script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js" $(document).ready(function() {
$('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
// onClick function for all plots (img's)
$('img:not(.zoomImg)').click(function() {
$('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
$('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
});
// onClick function for zoomImg
$('img.zoomImg').click(function() {
$('.zoomDiv').css({opacity: '0', width: '0%'});
});
});
<script src="hideOutput.js"></script> $(document).ready(function() {
$chunks = $('.fold');
$chunks.each(function () { // add button to source code chunks
if ( $(this).hasClass('s') ) {
$('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
$('pre.r', this).children('code').attr('class', 'folded');
} // add button to output chunks
if ( $(this).hasClass('o') ) {
$('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");
$('pre:not(.r)', this).children('code:not(r)').addClass('folded'); // add button to plots
$(this).find('img').wrap('<pre class=\"plot\"></pre>');
$('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");
$('pre.plot', this).children('img').addClass('folded');
}
}); // hide all chunks when document is loaded
$('.folded').css('display', 'none') // function to toggle the visibility
$('.showopt').click(function() {
var label = $(this).html();
if (label.indexOf("Show") >= 0) {
$(this).html(label.replace("Show", "Hide"));
} else {
$(this).html(label.replace("Hide", "Show"));
}
$(this).siblings('code, img').slideToggle('fast', 'swing');
});
}); Generate flowcharts of joins
Kappa y MLs for classification
### 1.- What is Base_fiscalia_v9, why it has 49,970 IDs & 174,961 rows : paste0("Patients in PO records with unique combination of ID, RUC, end type and date of comission of the offense and offense ", a_tab11_lab_aft_d1 (p= 5,470; RUCs= 5,993; n= 8,291) --> should be only people that commit offenses, not victims)
### 2.- why the difference of 49,970- 49,252????
### 3.-
Base_fiscalia_v10b_dic_2022<-
sqldf("SELECT *
FROM CONS_C1_df_dup_SEP_2020_22_d AS x
LEFT JOIN (SELECT *
FROM Base_fiscalia_v9
) AS y
ON x.hash_key == y.id AND
x.dup = 1
") #2022-11-25 added dup // #changed the direction to past events, where age at discharge is greater than the age of commission //FEB 2023: WHERE ano_bd_first='2015' OR ano_bd_first='2016' OR ano_bd_first='2017' OR ano_bd_first='2018' OR ano_bd_first='2019'
message(
paste0("First, for each baseline admission of patients in C1 (", format(length(unique(CONS_C1_df_dup_SEP_2020_22_d$hash_key)), big.mark=","), "; n= ",format(nrow(CONS_C1_df_dup_SEP_2020_22_d), big.mark=","),"), we found the PO records in which is found as the offender (p=", format(length(unique(Base_fiscalia_v9$id)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v9), big.mark=","),") (p= ", format(length(unique(Base_fiscalia_v10b_dic_2022$hash_key)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v10b_dic_2022), big.mark=","),"). We did not exclude patients in C1 that were registered in databases before 2015 in this step (Feb 2023)")
)
invisible("49,970 patients")
Base_fiscalia_v11b_dic_2022<-
Base_fiscalia_v10b_dic_2022 %>%
#discrepancies in names of variables
janitor::clean_names() %>% #janitor::tabyl(!is.na(dob_imp_num))
#previously recoded,
dplyr::select(-dateofbirth_imp, -country, -victim, -id_victim, -crime_code_c , -reg_c, -end_type_2c, -cod_comunadelito, -cod_lugarocurrencia, -sex_imp, -region_delito, -filter, -id)%>%
plyr::rename(c("dateofbirth_imp_2"="dateofbirth_imp")) %>%
dplyr::ungroup() %>%
#selected the first row with distinct information regarding patient ID, case ID, crime code.
dplyr::group_by(hash_key, dateofbirth_imp, caseid, crime_code_group_rec_prof) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::group_by(hash_key,dateofbirth_imp) %>%
summarise(n_off_acq= ifelse(sum(crime_code_group_rec_prof=="Acquisitive", na.rm=T)>0, 1,0), n_off_vio= ifelse(sum(crime_code_group_rec_prof=="Violent", na.rm=T)>0, 1,0), n_off_sud= ifelse(sum(crime_code_group_rec_prof== "Substance-related", na.rm=T)>0, 1,0), n_off_oth= ifelse(sum(crime_code_group_rec_prof== "Other", na.rm=T)>0, 1,0)) %>%
dplyr::ungroup() %>%
dplyr::mutate(n_off= rowSums(dplyr::select(., dplyr::starts_with("n_")), na.rm=T))
message(paste0( "Afterwards, we counted the offenses by type of offense (Acquisitive, violent, substance-related and other) and the total by each user before baseline admisson (p= ", format(length(unique(Base_fiscalia_v11b_dic_2022$hash_key)), big.mark=","), "; cases with records in P.O.= ",format(as.numeric(table(is.na(Base_fiscalia_v10b_dic_2022$dateofbirth_imp))[[2]]), big.mark=","),")." ))
#Registrar hurtos, robos, violencia intrafamiliar y otras acciones cometidas en las últimas 4 semanas
#Violencia Intrafamiliar (Maltrato físico o psicológico)
CONS_TOP_2022<-
# 107307
CONS_TOP%>%
# dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v9, id), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "id"))%>%
dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v11b_dic_2022, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>%
dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures
dplyr::select(HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>%
dplyr::filter(!is.na(HASH_KEY)) %>%
dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>%
dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>%
dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>%
dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),
fech_ap_top= lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T),
edad_a_ap_top_num= lubridate::time_length(lubridate::interval(dateofbirth_imp, fech_ap_top),unit="years"),
edad_b_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25,
edad_a_ap_top_num_lim= edad_a_ap_top_num-(1/12),
edad_b_ap_top_num_lim= edad_b_ap_top_num-(1/12)) %>%
dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>%
dplyr::filter(!is.na(edad_a_ap_top_num)) %>%
dplyr::group_by(HASH_KEY, edad_a_ap_top_num) %>%
dplyr::slice(1) %>%
dplyr::ungroup()message(paste0("Then, we standardized the varibles in TOP (e.g., dates format) and counted self-reported offenses, and also we standardized data with PO records (dates of birth) to get the age when the patient have answered TOP and the age previous [ (1/12)*365.25 ]=",round((1/12)*365.25,0)," days before TOP application. This resulted in ", format(nrow(CONS_TOP_2022), big.mark=","), " rows (combination of different application dates and patients) of ", format(length(unique(CONS_TOP_2022$HASH_KEY)), big.mark=","), " patients."))
Base_fiscalia_v9_dic_2022<-
Base_fiscalia_v9 %>%
dplyr::group_by(id, caseid, end_type, fec_comision_simple, crime_code_c) %>%
dplyr::slice(1) %>%
dplyr::ungroup()
message(
paste0("We reduced PO records to those that do not have the same sentence for the same offense committed at the same date in the same process (case ID/RUC) (p= ", format(length(unique(Base_fiscalia_v9_dic_2022$id)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v9_dic_2022), big.mark=","),")")
)
#busco en la base de datos de fiscalía, algún delito que haya cometido en el transcurso
#del último mes anterior a la aplicación del TOP
Base_fiscalia_v13c_dic_2022<-
sqldf("SELECT *
FROM CONS_TOP_2022 AS x
LEFT JOIN (SELECT *
FROM Base_fiscalia_v9_dic_2022
) AS y
ON x.HASH_KEY == y.id AND
x.edad_a_ap_top_num > y.age_offending_imp AND
x.edad_a_ap_top_num_lim < y.age_offending_imp") #2022-11-25 added dup // #changed the direction to past events, where age at discharge is greater than the age of commission
message(
paste0("Of the total of TOP applications (p= ", format(length(unique(CONS_TOP_2022$HASH_KEY)), big.mark=","), "; n= ",format(nrow(CONS_TOP_2022), big.mark=","),"), we looked for offenses comitted in the period of thirty days before (`x.edad_a_ap_top_num_lim`) the application of TOP (p= ", format(length(unique(Base_fiscalia_v13c_dic_2022$HASH_KEY)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v13c_dic_2022), big.mark=","),"). If this database contains more records, is beacause there may be some patients that had more than one record as a result of more than one application of TOP. This will be resolved in the next step")
)
#Luego, contar por cada combinación de HASH y fecha de aplicación, el número de delitos segun familai de delito
#crime_code_group
Base_fiscalia_v13c_dic_2022_2<-
Base_fiscalia_v13c_dic_2022 %>%
dplyr::select(tidyr::any_of(c("HASH_KEY", "fech_ap_top_num", "Fecha.Aplicación.TOP", "Hurto", "Robo", "Venta.Drogas", "Riña",
"Total.VIF", "Otro", "tot_off_top", "fech_ap_top", "edad_a_ap_top_num", "edad_b_ap_top_num",
"edad_a_ap_top_num_lim", "edad_b_ap_top_num_lim", "caseid", "end_type", "fec_comision_simple",
"crime_code_c", "crime_code_group_rec", "crime_code_group_rec_prof", "age_offending_imp")))
#HASH_KEY
#fech_ap_top_num
#Fecha.Aplicación.TOP
#Hurto
#Robo
#Venta.Drogas
#Riña
#Total.VIF
#Otro
#tot_off_top
#fech_ap_top
#edad_a_ap_top_num
#edad_b_ap_top_num
#edad_a_ap_top_num_lim
#edad_b_ap_top_num_lim
#caseid
#end_type
#fec_comision_simple
#crime_code_c
#crime_code_group_rec_prof
#crime_code_group_rec
#age_offending_imp
invisible("# 131 rows added, why?")
message("Patients than had more than one combination of application date and ID. That means, they answered TOP more than one time, and in at least 2 of these times they had previous records: "); Base_fiscalia_v13c_dic_2022_2 %>%
dplyr::count(HASH_KEY, fech_ap_top_num) %>%
dplyr::filter(n>1) %>%
dplyr::summarise(sum=sum(n), n=n(), tot=sum-n)
sum n tot
1 245 114 131
Base_fiscalia_v13c_dic_2022_3<-
Base_fiscalia_v13c_dic_2022_2 %>%
dplyr::ungroup() %>%
dplyr::mutate(vio=dplyr::if_else(crime_code_group_rec_prof=="Violent",1,0,0),
acq=dplyr::if_else(crime_code_group_rec_prof=="Acquisitive",1,0,0),
sud=dplyr::if_else(crime_code_group_rec_prof=="Substance-related",1,0,0),
oth=dplyr::if_else(crime_code_group_rec_prof=="Other",1,0,0)) %>%
dplyr::mutate(tot= rowSums(dplyr::select(., c("vio","acq","sud","oth")), na.rm=T)) %>%
dplyr::mutate(acq_top= rowSums(dplyr::select(., c("Hurto","Robo")), na.rm=T)) %>%
dplyr::mutate(sud_top= rowSums(dplyr::select(., c("Venta.Drogas")), na.rm=T)) %>%
dplyr::mutate(vio_top= rowSums(dplyr::select(., c("Riña", "Total.VIF")), na.rm=T)) %>%
dplyr::mutate(oth_top= rowSums(dplyr::select(., c("Otro")), na.rm=T)) %>%
dplyr::group_by(HASH_KEY, fech_ap_top_num) %>%
dplyr::mutate(vio=sum(vio,na.rm=T), acq=sum(acq,na.rm=T), sud=sum(sud,na.rm=T),
oth=sum(oth,na.rm=T), tot=sum(tot,na.rm=T), n=n()) %>%
dplyr::slice(1) %>%
dplyr::ungroup()
#Los que tienen todo pelado en caseid es porque no tienen fecha de comisión en los últimos 30 días
message(
paste0("For each combination of TOP application, count the number of previous offenses in PO records (those with 0s are people that did not committed an offense recorded by PO in the period previous to the application of TOP) (p= ", format(length(unique(Base_fiscalia_v13c_dic_2022_3$HASH_KEY)), big.mark=","), "; n= ",format(nrow(Base_fiscalia_v13c_dic_2022_3), big.mark=","),")")
)
#http://observatorio.ministeriodesarrollosocial.gob.cl/pobreza-comunal-2020
Comunas_PNDR <- readxl::read_excel("Clasificacion-comunas-PNDR.xlsx")%>%
dplyr::mutate(cod= dplyr::case_when(as.character(cod_com)=="16101"~"8401",
as.character(cod_com)=="16102"~"8402",
as.character(cod_com)=="16103"~"8406",
as.character(cod_com)=="16104"~"8407",
as.character(cod_com)=="16105"~"8410",
as.character(cod_com)=="16106"~"8411",
as.character(cod_com)=="16107"~"8413",
as.character(cod_com)=="16108"~"8418",
as.character(cod_com)=="16109"~"8421",
as.character(cod_com)=="16201"~"8414",
as.character(cod_com)=="16202"~"8403",
as.character(cod_com)=="16203"~"8404",
as.character(cod_com)=="16204"~"8408",
as.character(cod_com)=="16205"~"8412",
as.character(cod_com)=="16206"~"8415",
as.character(cod_com)=="16207"~"8420",
as.character(cod_com)=="16301"~"8416",
as.character(cod_com)=="16302"~"8405",
as.character(cod_com)=="16303"~"8409",
as.character(cod_com)=="16304"~"8417",
as.character(cod_com)=="16305"~"8419",
T~ as.character(cod_com)
))
#http://observatorio.ministeriodesarrollosocial.gob.cl/pobreza-comunal-2011
pobr_mult_2020<-readxl::read_excel("Estimaciones_de_Tasa_de_Pobreza_por_Ingresos_por_Comunas_2020_revisada2022_09.xlsx", skip=1) %>% dplyr::mutate(anio=2020) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2019<-readxl::read_excel("Estimaciones_de_Tasa_de_Pobreza_por_Ingresos_por_Comunas_2020_revisada2022_09.xlsx", skip=1) %>% dplyr::mutate(anio=2019) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2018<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2017.xlsx", skip=1) %>% dplyr::mutate(anio=2018) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2017<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2017.xlsx", skip=1) %>% dplyr::mutate(anio=2017) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2016<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2015.xlsx", skip=1) %>% dplyr::mutate(anio=2016) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2015<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_multidimensional_2015.xlsx", skip=1) %>% dplyr::mutate(anio=2015) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2014<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_2013.xlsx", skip=1)%>% dplyr::mutate(anio=2014) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2013<-readxl::read_excel("PLANILLA_Estimaciones_comunales_tasa_pobreza_por_ingresos_2013.xlsx", skip=1)%>% dplyr::mutate(anio=2013) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2012<-readxl::read_excel("Estimacion_tasa_de_pobreza_comunal_2011_(nueva _metodologia).xlsx", skip=1)%>% dplyr::mutate(anio=2012) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2011<-readxl::read_excel("Estimacion_tasa_de_pobreza_comunal_2011_(nueva _metodologia).xlsx", skip=1)%>% dplyr::mutate(anio=2011) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2010<-readxl::read_excel("Estimacion_tasa_de_pobreza_comunal_2011_(nueva _metodologia).xlsx", skip=1)%>% dplyr::mutate(anio=2010) %>% dplyr::rename_at(vars( contains("Porcentaje de") ), ~"porc_pobr") %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2009<-readxl::read_excel("PobrezaporComunas_SAE_20092011.xlsx", skip=3)%>% dplyr::mutate(anio=2009) %>% dplyr::select(anio, everything()) %>% dplyr::select(1:5) %>% dplyr::rename_at(vars( contains("Incidencia pobreza") ), ~"porc_pobr") %>% dplyr::rename("Código"=2, "Nombre comuna"=3) %>% dplyr::mutate(Región=rep("")) %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2008<-readxl::read_excel("PobrezaporComunas_SAE_20092011.xlsx", skip=3)%>% dplyr::mutate(anio=2008) %>% dplyr::select(anio, everything()) %>% dplyr::select(1:5) %>% dplyr::rename_at(vars( contains("Incidencia pobreza") ), ~"porc_pobr") %>% dplyr::rename("Código"=2, "Nombre comuna"=3) %>% dplyr::mutate(Región=rep("")) %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
pobr_mult_2007<-readxl::read_excel("PobrezaporComunas_SAE_20092011.xlsx", skip=3)%>% dplyr::mutate(anio=2007) %>% dplyr::select(anio, everything()) %>% dplyr::select(1:5) %>% dplyr::rename_at(vars( contains("Incidencia pobreza") ), ~"porc_pobr") %>% dplyr::rename("Código"=2, "Nombre comuna"=3) %>% dplyr::mutate(Región=rep("")) %>% dplyr::select(anio, Código, Región, `Nombre comuna`, porc_pobr)
# replace comuna= "16103" if strpos(strlower(comuna),"8406")>0
# replace comuna= "16104" if strpos(strlower(comuna),"8407")>0
# replace comuna= "16105" if strpos(strlower(comuna),"8410")>0
# replace comuna= "16106" if strpos(strlower(comuna),"8411")>0
# replace comuna= "16107" if strpos(strlower(comuna),"8413")>0
# replace comuna= "16108" if strpos(strlower(comuna),"8418")>0
# replace comuna= "16109" if strpos(strlower(comuna),"8421")>0
# replace comuna= "16201" if strpos(strlower(comuna),"8414")>0
# replace comuna= "16202" if strpos(strlower(comuna),"8403")>0
# replace comuna= "16203" if strpos(strlower(comuna),"8404")>0
# replace comuna= "16204" if strpos(strlower(comuna),"8408")>0
# replace comuna= "16205" if strpos(strlower(comuna),"8412")>0
# replace comuna= "16206" if strpos(strlower(comuna),"8415")>0
# replace comuna= "16207" if strpos(strlower(comuna),"8420")>0
# replace comuna= "16301" if strpos(strlower(comuna),"8416")>0
# replace comuna= "16302" if strpos(strlower(comuna),"8405")>0
# replace comuna= "16303" if strpos(strlower(comuna),"8409")>0
# replace comuna= "16304" if strpos(strlower(comuna),"8417")>0
# replace comuna= "16305" if strpos(strlower(comuna),"8419")>0
pobr_mult_2007_2020<-
rbind.data.frame(pobr_mult_2007, pobr_mult_2008, pobr_mult_2009, pobr_mult_2010, pobr_mult_2011, pobr_mult_2012, pobr_mult_2013, pobr_mult_2014, pobr_mult_2015, pobr_mult_2016, pobr_mult_2017, pobr_mult_2018, pobr_mult_2019, pobr_mult_2020) %>%
dplyr::mutate(cod= dplyr::case_when(Código=="16101"~"8401",
Código=="16102"~"8402",
Código=="16103"~"8406",
Código=="16104"~"8407",
Código=="16105"~"8410",
Código=="16106"~"8411",
Código=="16107"~"8413",
Código=="16108"~"8418",
Código=="16109"~"8421",
Código=="16201"~"8414",
Código=="16202"~"8403",
Código=="16203"~"8404",
Código=="16204"~"8408",
Código=="16205"~"8412",
Código=="16206"~"8415",
Código=="16207"~"8420",
Código=="16301"~"8416",
Código=="16302"~"8405",
Código=="16303"~"8409",
Código=="16304"~"8417",
Código=="16305"~"8419",
T~ Código
))
#2023-02-01:classifications by municipality
class_centros <- readxl::read_excel(paste0(path,"/_ig_borquez/class_centros.xlsx"))%>%
tidylog::left_join(dplyr::distinct(CONS_C1_df_dup_SEP_2020, id_centro, nombre_centro), by=c("nombre_centro_1" ="nombre_centro"))%>%
dplyr::group_by(id_centro)%>%
slice(1)%>%
dplyr::ungroup()%>%
dplyr::mutate(id_centro=dplyr::case_when(grepl("Allende",nombre_centro_1) & is.na(id_centro)~ "475", T~ as.character(id_centro)))%>%
dplyr::group_by(id_centro)%>%
slice(1)
#dplyr::filter(grepl("Allende",nombre_centro_1))
CONS_C1_df_dup_SEP_2020_22_e<-
CONS_C1_df_dup_SEP_2020_22_d %>%
dplyr::mutate(comuna_residencia_cod_rec= as.character(readr::parse_number(as.character(comuna_residencia_cod))), anio_ing_tr= lubridate::epiyear(fech_ing)) %>% #glimpse()
dplyr::left_join(pobr_mult_2007_2020[,c("anio", "cod","porc_pobr")], by= c("comuna_residencia_cod_rec"="cod", "anio_ing_tr"="anio")) %>%
dplyr::left_join(Comunas_PNDR[,c("cod", "Clasificación")], by= c("comuna_residencia_cod_rec"="cod"))%>%
#2023-02-01
dplyr::left_join(class_centros[,c("id_centro", "nombre_centro_1", "classification")], by= "id_centro")%>%
purrr::when(nrow(.)>nrow(CONS_C1_df_dup_SEP_2020_22_d) ~ stop("More cases in the new database"), ~.) %>%
dplyr::mutate(via_adm_sus_prin_act= factor(dplyr::case_when(via_adm_sus_prin_act=="Injected Intravenously or Intramuscularly"~ "Other",T~as.character(via_adm_sus_prin_act)))) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(con_quien_vive_joel=dplyr::case_when(
grepl("Solo$",con_quien_vive, ignore.case=T)~"Alone",
grepl("Con abuelos",con_quien_vive, ignore.case=T)~"Family of origin",
grepl("Con hermanos",con_quien_vive, ignore.case=T)~"Family of origin",
grepl("Con la madre \\(sola\\)",con_quien_vive, ignore.case=T)~"Family of origin",
grepl("Con otro pariente",con_quien_vive, ignore.case=T)~"Others",
grepl("con hijos y padres o familia",con_quien_vive, ignore.case=T)~"Family of origin",
grepl("con la pareja y padres o familia de origen",con_quien_vive, ignore.case=T)~"With couple/children",
grepl("con padres o familia de origen",con_quien_vive, ignore.case=T)~"Family of origin",
#2021-10-01
grepl("Únicamente con hijos",con_quien_vive, ignore.case=T)~"With couple/children",
grepl("Únicamente con pareja",con_quien_vive, ignore.case=T)~"With couple/children",
#2021-10-01
grepl("Con la Pareja, Hijos y Padres o Familia de Origen",con_quien_vive, ignore.case=T)~"With couple/children",
grepl("Hijos y Padres o Familia de Origen",con_quien_vive, ignore.case=T)~"Family of origin",
#2021-10-01
grepl("Únicamente con la pareja e hijos",con_quien_vive, ignore.case=T)~"With couple/children",
grepl("Con amigos",con_quien_vive, ignore.case=T)~"Others",
grepl("Con otro NO pariente",con_quien_vive, ignore.case=T)~"Others",
grepl("*Otros$",con_quien_vive, ignore.case=T)~"Others"))
#TO CHECK IF SOME MUNICIPALLITIES DID NOT JOIN
#dplyr::filter(is.na(porc_pobr)) %>% dplyr::select(comuna_residencia_cod, anio_ing_tr)
#TO CHECK IF SOME MUNICIPALLITIES DID NOT JOIN
#dplyr::filter(is.na(porc_pobr)) %>% dplyr::select(comuna_residencia_cod, anio_ing_tr)
#Private Therapeutic Community, ref
CONS_C1_df_dup_SEP_2020_22_e$clas_r <- relevel(factor(CONS_C1_df_dup_SEP_2020_22_e$Clasificación), ref = "Urbana")
CONS_C1_df_dup_SEP_2020_22_e$via_adm_sus_prin_act <- relevel(factor(CONS_C1_df_dup_SEP_2020_22_e$via_adm_sus_prin_act), ref = "Oral (drunk or eaten)")
cat("Recode to binary measures")
Recode to binary measures
Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot_off_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$tot_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$tot>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$vio_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$acq_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$sud_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$oth_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$acq_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$acq_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$sud_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$sud_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$vio_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$vio_top>=1,1,0)
Base_fiscalia_v13c_dic_2022_3$oth_top_bin<-
ifelse(Base_fiscalia_v13c_dic_2022_3$oth_top>=1,1,0)
Join the data of the comparison between TOP reports and PO records, with the baseline information at admission of each patient.
vars_db<-
c("edad_a_ap_top_num", "end_type", "fech_ap_top_num", "sex", "duplicates_filtered", "id_centro", "macrozona", "nombre_region", "comuna_residencia_cod", "escolaridad_rec", "estado_conyugal_2", "freq_cons_sus_prin", "via_adm_sus_prin_act", "con_quien_vive_joel", "sus_principal_mod", "origen_ingreso_mod", "numero_de_hijos_mod", "tenencia_de_la_vivienda_mod", "sus_ini_mod_mvv", "condicion_ocupacional_corr", "dg_trs_cons_sus_or", "comorbidity_icd_10", "clas_r", "classification")
Base_fiscalia_v13c_dic_2022_4 <-
Base_fiscalia_v13c_dic_2022_3 %>%
tidylog::left_join(CONS_C1_df_dup_SEP_2020_22_e, by=c("HASH_KEY"="hash_key")) %>%
dplyr::select(any_of(c("HASH_KEY",vars_db, paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin"), "numero_de_hijos_mod", "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"))) %>%
#multinomial
dplyr::mutate(discrepancies=dplyr::case_when(tot> tot_off_top~ "underreport", tot< tot_off_top~ "overreport", T~ "coincidence")) %>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(tenencia_de_la_vivienda_mod2=
factor(dplyr::case_when(tenencia_de_la_vivienda_mod=="Allegado"~"Stays temporarily with a relative", tenencia_de_la_vivienda_mod=="Arrienda"~"Renting", tenencia_de_la_vivienda_mod=="Cedida"~"Owner/Transferred dwellings/Pays Dividends", tenencia_de_la_vivienda_mod=="Ocupación Irregular"~"Illegal Settlement", tenencia_de_la_vivienda_mod=="Otros"~"Others", tenencia_de_la_vivienda_mod=="Paga dividendo"~"Owner/Transferred dwellings/Pays Dividends", tenencia_de_la_vivienda_mod=="Propia"~"Owner/Transferred dwellings/Pays Dividends", T~NA_character_)))%>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
# dplyr::mutate(numero_de_hijos_mod_joel=dplyr::case_when(
# grepl("Si$",embarazo, ignore.case=T)~as.integer(numero_de_hijos_mod+1),
# T~as.integer(numero_de_hijos_mod)))%>%
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=F,include_na =F)) %>% #, locale=locale(encoding = "Latin1")
dplyr::mutate(freq_cons_sus_prin=if_else(freq_cons_sus_prin=='Did not use','Less than 1 day a week',as.character(freq_cons_sus_prin),NA_character_))
attr(Base_fiscalia_v13c_dic_2022_4$escolaridad_rec,"label") <- "Educational Attainment"
attr(Base_fiscalia_v13c_dic_2022_4$freq_cons_sus_prin,"label") <- "Primary Substance at Admission Usage
Frequency"
attr(Base_fiscalia_v13c_dic_2022_4$fech_ap_top_num,"label") <- "Date of discharge"
attr(Base_fiscalia_v13c_dic_2022_4$edad_a_ap_top_num,"label") <- "Age at TOP application"
attr(Base_fiscalia_v13c_dic_2022_4$end_type,"label") <- "Date of discharge"
attr(Base_fiscalia_v13c_dic_2022_4$comorbidity_icd_10,"label") <- "Comorbidity ICD-10 (with amount of different diagnosis)"
attr(Base_fiscalia_v13c_dic_2022_4$con_quien_vive_joel,"label") <- "Living conditions"
attr(Base_fiscalia_v13c_dic_2022_4$acq_bin,"label") <- "Pre-treatment Criminality, Acquisitive (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$sud_bin,"label") <- "Pre-treatment Criminality, Substance-related (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$vio_bin,"label") <- "Pre-treatment Criminality, Violent (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$oth_bin,"label") <- "Pre-treatment Criminality, Other (PO) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$acq_top_bin,"label") <- "Pre-treatment Criminality, Acquisitive (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$sud_top_bin,"label") <- "Pre-treatment Criminality, Substance-related (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$vio_top_bin,"label") <- "Pre-treatment Criminality, Violent (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$oth_top_bin,"label") <- "Pre-treatment Criminality, Other (TOP) (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$tot_off_top,"label") <- "Count of previous TOP offenses"
attr(Base_fiscalia_v13c_dic_2022_4$tot_off_top_bin,"label") <- "Count of previous TOP offenses (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$tot,"label") <- "Count of previous offenses"
attr(Base_fiscalia_v13c_dic_2022_4$tot_bin,"label") <- "Count of previous PO offenses (dichotomized)"
attr(Base_fiscalia_v13c_dic_2022_4$discrepancies,"label") <- "Relationship between total records (PO) and reports of offenses (TOP)"
attr(Base_fiscalia_v13c_dic_2022_4$classification,"label") <- "Types of ther. center"
attr(Base_fiscalia_v13c_dic_2022_4$clas_r,"label") <- "Rurality classification"
tbone_desc_merge<-
CreateTableOne(vars= setdiff(c(vars_db, paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin"), "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"),c("id_centro", "comuna_residencia_cod")), data= Base_fiscalia_v13c_dic_2022_4, factorVars = c(setdiff(vars_db,c("edad_a_ap_top_num","fech_ap_top_num","id_centro")), paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin")), smd=T, addOverall = T, includeNA=T, strata="discrepancies", test=T) #
as.data.frame.TableOne(tbone_desc_merge, smd=T, nonnormal= T)%>%
dplyr::mutate(char2=characteristic) %>%
tidyr::fill(char2) %>%
dplyr::select(char2,everything()) %>%
dplyr::mutate(level=ifelse(is.na(level),"[Missing]",level)) %>%
dplyr::mutate(char2=dplyr::case_when(characteristic=="NA"~NA_character_,T~as.character(characteristic))) %>%
format_cells(1, 1:length(names(.)), "bold") %>%
dplyr::select(-1) %>%
#knitr::kable(size=10, format="markdown",caption= "Summary descriptives, by discrepancy between PO records and TOP reports of offenses", escape=T)
knitr::kable(size=10, format="html",caption= "Summary descriptives, by discrepancy between PO records and TOP reports of offenses", escape=T) %>% kableExtra::kable_classic()%>%
kableExtra::scroll_box(width = "100%", height = "550px")
| characteristic | level | Overall | coincidence | overreport | underreport | p | test | SMD |
|---|---|---|---|---|---|---|---|---|
| n | 54159 | 41314 | 12285 | 560 | ||||
| Age at TOP application (median [IQR]) | 35.03 [28.54, 43.46] | 35.89 [29.13, 44.48] | 32.65 [26.94, 39.74] | 34.53 [27.62, 42.82] | <0.001 | nonnorm | 0.222 | |
| Date of discharge (%) | ACUERDO REPARATORIO | 100 ( 0.2) | 29 ( 0.1) | 19 ( 0.2) | 52 ( 9.3) | <0.001 | 8.497 | |
| SENTENCIA DEFINITIVA CONDENATORIA | 756 ( 1.4) | 208 ( 0.5) | 170 ( 1.4) | 378 ( 67.5) | ||||
| SOBRESEIMIENTO DEFINITIVO 240 | 171 ( 0.3) | 58 ( 0.1) | 42 ( 0.3) | 71 ( 12.7) | ||||
| SUSPENSI¿N CONDICIONAL DEL PROCEDIMIENTO | 123 ( 0.2) | 35 ( 0.1) | 29 ( 0.2) | 59 ( 10.5) | ||||
| [Missing] | 53009 (97.9) | 40984 ( 99.2) | 12025 (97.9) | 0 ( 0.0) | ||||
| Date of discharge (median [IQR]) | 17437.00 [17070.00, 17805.00] | 17463.00 [17107.00, 17821.00] | 17352.00 [16958.00, 17743.00] | 17348.50 [17003.75, 17718.00] | <0.001 | nonnorm | 0.148 | |
| sex (%) | Men | 42003 (77.6) | 32442 ( 78.5) | 9111 (74.2) | 450 ( 80.4) | <0.001 | 0.099 | |
| Women | 12156 (22.4) | 8872 ( 21.5) | 3174 (25.8) | 110 ( 19.6) | ||||
| Número de Tratamientos por HASH (Total)/Number of Treatments by User (Total) (%) | 1 | 32076 (59.2) | 25038 ( 60.6) | 6680 (54.4) | 358 ( 63.9) | <0.001 | 0.164 | |
| 2 | 13495 (24.9) | 10076 ( 24.4) | 3286 (26.7) | 133 ( 23.8) | ||||
| 3 | 5337 ( 9.9) | 3903 ( 9.4) | 1385 (11.3) | 49 ( 8.8) | ||||
| 4 | 2094 ( 3.9) | 1506 ( 3.6) | 575 ( 4.7) | 13 ( 2.3) | ||||
| 5 | 685 ( 1.3) | 468 ( 1.1) | 213 ( 1.7) | 4 ( 0.7) | ||||
| 6 | 304 ( 0.6) | 198 ( 0.5) | 104 ( 0.8) | 2 ( 0.4) | ||||
| 7 | 113 ( 0.2) | 80 ( 0.2) | 32 ( 0.3) | 1 ( 0.2) | ||||
| 8 | 54 ( 0.1) | 45 ( 0.1) | 9 ( 0.1) | 0 ( 0.0) | ||||
| 10 | 1 ( 0.0) | 0 ( 0.0) | 1 ( 0.0) | 0 ( 0.0) | ||||
| Macrozona del Centro del Último Registro(b)/Macrozones of the Center of the Last Entry(b) (%) | Center | 36329 (67.1) | 27629 ( 66.9) | 8353 (68.0) | 347 ( 62.0) | <0.001 | 0.157 | |
| North | 10288 (19.0) | 7509 ( 18.2) | 2665 (21.7) | 114 ( 20.4) | ||||
| South | 7542 (13.9) | 6176 ( 14.9) | 1267 (10.3) | 99 ( 17.7) | ||||
| Región del Centro del Último Registro(b)/Chilean Region of the Center of the Last Entry(b) (%) | Antofagasta (02) | 2437 ( 4.5) | 1810 ( 4.4) | 604 ( 4.9) | 23 ( 4.1) | <0.001 | 0.217 | |
| Araucanía (09) | 1715 ( 3.2) | 1413 ( 3.4) | 271 ( 2.2) | 31 ( 5.5) | ||||
| Arica (15) | 2134 ( 3.9) | 1604 ( 3.9) | 507 ( 4.1) | 23 ( 4.1) | ||||
| Atacama (03) | 2695 ( 5.0) | 1902 ( 4.6) | 758 ( 6.2) | 35 ( 6.2) | ||||
| Aysén (11) | 1200 ( 2.2) | 1053 ( 2.5) | 133 ( 1.1) | 14 ( 2.5) | ||||
| Biobío (08) | 3512 ( 6.5) | 2779 ( 6.7) | 700 ( 5.7) | 33 ( 5.9) | ||||
| Coquimbo (04) | 1964 ( 3.6) | 1414 ( 3.4) | 531 ( 4.3) | 19 ( 3.4) | ||||
| Los Lagos (10) | 2618 ( 4.8) | 2035 ( 4.9) | 549 ( 4.5) | 34 ( 6.1) | ||||
| Los Ríos (14) | 1097 ( 2.0) | 882 ( 2.1) | 204 ( 1.7) | 11 ( 2.0) | ||||
| Magallanes (12) | 912 ( 1.7) | 793 ( 1.9) | 110 ( 0.9) | 9 ( 1.6) | ||||
| Maule (07) | 3959 ( 7.3) | 3193 ( 7.7) | 722 ( 5.9) | 44 ( 7.9) | ||||
| Metropolitana (13) | 20944 (38.7) | 15832 ( 38.3) | 4917 (40.0) | 195 ( 34.8) | ||||
| Ñuble (16) | 495 ( 0.9) | 384 ( 0.9) | 107 ( 0.9) | 4 ( 0.7) | ||||
| O’Higgins (06) | 3622 ( 6.7) | 2725 ( 6.6) | 862 ( 7.0) | 35 ( 6.2) | ||||
| Tarapacá (01) | 1058 ( 2.0) | 779 ( 1.9) | 265 ( 2.2) | 14 ( 2.5) | ||||
| Valparaíso (05) | 3797 ( 7.0) | 2716 ( 6.6) | 1045 ( 8.5) | 36 ( 6.4) | ||||
| Educational Attainment (%) | 3-Completed primary school or less | 15594 (28.8) | 11780 ( 28.5) | 3659 (29.8) | 155 ( 27.7) | <0.001 | 0.072 | |
| 2-Completed high school or less | 29796 (55.0) | 22553 ( 54.6) | 6925 (56.4) | 318 ( 56.8) | ||||
| 1-More than high school | 8605 (15.9) | 6861 ( 16.6) | 1660 (13.5) | 84 ( 15.0) | ||||
| [Missing] | 164 ( 0.3) | 120 ( 0.3) | 41 ( 0.3) | 3 ( 0.5) | ||||
| Estado Conyugal/Marital Status (%) | Married/Shared living arrangements | 16249 (30.0) | 12675 ( 30.7) | 3409 (27.7) | 165 ( 29.5) | <0.001 | 0.119 | |
| Separated/Divorced | 5440 (10.0) | 4484 ( 10.9) | 900 ( 7.3) | 56 ( 10.0) | ||||
| Single | 31922 (58.9) | 23702 ( 57.4) | 7886 (64.2) | 334 ( 59.6) | ||||
| Widower | 452 ( 0.8) | 372 ( 0.9) | 77 ( 0.6) | 3 ( 0.5) | ||||
| [Missing] | 96 ( 0.2) | 81 ( 0.2) | 13 ( 0.1) | 2 ( 0.4) | ||||
| Primary Substance at Admission Usage Frequency (%) | 1 day a week or more | 3518 ( 6.5) | 2868 ( 6.9) | 618 ( 5.0) | 32 ( 5.7) | <0.001 | 0.143 | |
| 2 to 3 days a week | 14226 (26.3) | 11235 ( 27.2) | 2835 (23.1) | 156 ( 27.9) | ||||
| 4 to 6 days a week | 8645 (16.0) | 6549 ( 15.9) | 2000 (16.3) | 96 ( 17.1) | ||||
| Daily | 25152 (46.4) | 18520 ( 44.8) | 6388 (52.0) | 244 ( 43.6) | ||||
| Less than 1 day a week | 2411 ( 4.5) | 1984 ( 4.8) | 397 ( 3.2) | 30 ( 5.4) | ||||
| [Missing] | 207 ( 0.4) | 158 ( 0.4) | 47 ( 0.4) | 2 ( 0.4) | ||||
| via_adm_sus_prin_act (%) | Oral (drunk or eaten) | 19080 (35.2) | 15655 ( 37.9) | 3209 (26.1) | 216 ( 38.6) | <0.001 | 0.218 | |
| Intranasal (powder aspiration) | 9349 (17.3) | 7164 ( 17.3) | 2104 (17.1) | 81 ( 14.5) | ||||
| Other | 75 ( 0.1) | 46 ( 0.1) | 26 ( 0.2) | 3 ( 0.5) | ||||
| Smoked or Pulmonary Aspiration | 25655 (47.4) | 18449 ( 44.7) | 6946 (56.5) | 260 ( 46.4) | ||||
| Living conditions (%) | Alone | 5648 (10.4) | 4468 ( 10.8) | 1109 ( 9.0) | 71 ( 12.7) | <0.001 | 0.092 | |
| Family of origin | 24160 (44.6) | 18093 ( 43.8) | 5817 (47.4) | 250 ( 44.6) | ||||
| Others | 4821 ( 8.9) | 3655 ( 8.8) | 1119 ( 9.1) | 47 ( 8.4) | ||||
| With couple/children | 19530 (36.1) | 15098 ( 36.5) | 4240 (34.5) | 192 ( 34.3) | ||||
| Sustancia Principal de Consumo (Sólo más frecuentes)(f)/Primary or Main Substance of Consumption at Admission (Only more frequent)(f) (%) | Alcohol | 18313 (33.8) | 15082 ( 36.5) | 3021 (24.6) | 210 ( 37.5) | <0.001 | 0.254 | |
| Cocaine hydrochloride | 9081 (16.8) | 6970 ( 16.9) | 2030 (16.5) | 81 ( 14.5) | ||||
| Marijuana | 2982 ( 5.5) | 2308 ( 5.6) | 628 ( 5.1) | 46 ( 8.2) | ||||
| Other | 675 ( 1.2) | 507 ( 1.2) | 159 ( 1.3) | 9 ( 1.6) | ||||
| Cocaine paste | 23108 (42.7) | 16447 ( 39.8) | 6447 (52.5) | 214 ( 38.2) | ||||
| Origen de Ingreso (Primera Entrada)/Motive of Admission to Treatment (First Entry) (%) | Spontaneous | 24913 (46.0) | 18913 ( 45.8) | 5734 (46.7) | 266 ( 47.5) | <0.001 | 0.165 | |
| Assisted Referral | 5500 (10.2) | 4000 ( 9.7) | 1458 (11.9) | 42 ( 7.5) | ||||
| Other | 2724 ( 5.0) | 2041 ( 4.9) | 664 ( 5.4) | 19 ( 3.4) | ||||
| Justice Sector | 6743 (12.5) | 5429 ( 13.1) | 1224 (10.0) | 90 ( 16.1) | ||||
| Health Sector | 14279 (26.4) | 10931 ( 26.5) | 3205 (26.1) | 143 ( 25.5) | ||||
| Número de Hijos (Valor Max.)/Number of Children (Max. Value) (%) | 0 | 13054 (24.1) | 9763 ( 23.6) | 3169 (25.8) | 122 ( 21.8) | <0.001 | 0.113 | |
| 1 | 14892 (27.5) | 11240 ( 27.2) | 3495 (28.4) | 157 ( 28.0) | ||||
| 2 | 13371 (24.7) | 10377 ( 25.1) | 2849 (23.2) | 145 ( 25.9) | ||||
| 3 | 7701 (14.2) | 5977 ( 14.5) | 1634 (13.3) | 90 ( 16.1) | ||||
| 4 | 2956 ( 5.5) | 2248 ( 5.4) | 684 ( 5.6) | 24 ( 4.3) | ||||
| 5 | 1120 ( 2.1) | 888 ( 2.1) | 221 ( 1.8) | 11 ( 2.0) | ||||
| 6 | 368 ( 0.7) | 290 ( 0.7) | 75 ( 0.6) | 3 ( 0.5) | ||||
| 7 | 122 ( 0.2) | 89 ( 0.2) | 31 ( 0.3) | 2 ( 0.4) | ||||
| 8 | 52 ( 0.1) | 41 ( 0.1) | 11 ( 0.1) | 0 ( 0.0) | ||||
| 9 | 33 ( 0.1) | 27 ( 0.1) | 6 ( 0.0) | 0 ( 0.0) | ||||
| 10 | 12 ( 0.0) | 7 ( 0.0) | 5 ( 0.0) | 0 ( 0.0) | ||||
| [Missing] | 478 ( 0.9) | 367 ( 0.9) | 105 ( 0.9) | 6 ( 1.1) | ||||
| tenencia_de_la_vivienda_mod (%) | Illegal Settlement | 777 ( 1.4) | 545 ( 1.3) | 224 ( 1.8) | 8 ( 1.4) | <0.001 | 0.070 | |
| Others | 1326 ( 2.4) | 1007 ( 2.4) | 308 ( 2.5) | 11 ( 2.0) | ||||
| Owner/Transferred dwellings/Pays Dividends | 17865 (33.0) | 13789 ( 33.4) | 3895 (31.7) | 181 ( 32.3) | ||||
| Renting | 9535 (17.6) | 7358 ( 17.8) | 2080 (16.9) | 97 ( 17.3) | ||||
| Stays temporarily with a relative | 22163 (40.9) | 16755 ( 40.6) | 5166 (42.1) | 242 ( 43.2) | ||||
| [Missing] | 2493 ( 4.6) | 1860 ( 4.5) | 612 ( 5.0) | 21 ( 3.8) | ||||
| Sustancia de Inicio (Valor más vulnerable)/Starting Substance (Most vulnerable value) (%) | Alcohol | 31493 (58.1) | 24964 ( 60.4) | 6194 (50.4) | 335 ( 59.8) | <0.001 | 0.169 | |
| Cocaine hydrochloride | 1878 ( 3.5) | 1391 ( 3.4) | 471 ( 3.8) | 16 ( 2.9) | ||||
| Marijuana | 16183 (29.9) | 11629 ( 28.1) | 4399 (35.8) | 155 ( 27.7) | ||||
| Other | 1378 ( 2.5) | 990 ( 2.4) | 369 ( 3.0) | 19 ( 3.4) | ||||
| Cocaine paste | 2991 ( 5.5) | 2159 ( 5.2) | 802 ( 6.5) | 30 ( 5.4) | ||||
| [Missing] | 236 ( 0.4) | 181 ( 0.4) | 50 ( 0.4) | 5 ( 0.9) | ||||
| Condición Ocupacional Criterio Corregido(f)/Occupational Status Corrected(f) (%) | Employed | 23952 (44.2) | 19117 ( 46.3) | 4551 (37.0) | 284 ( 50.7) | <0.001 | 0.191 | |
| Inactive | 4785 ( 8.8) | 3604 ( 8.7) | 1135 ( 9.2) | 46 ( 8.2) | ||||
| Looking for a job for the first time | 130 ( 0.2) | 86 ( 0.2) | 42 ( 0.3) | 2 ( 0.4) | ||||
| No activity | 3948 ( 7.3) | 2911 ( 7.0) | 1000 ( 8.1) | 37 ( 6.6) | ||||
| Not seeking for work | 713 ( 1.3) | 537 ( 1.3) | 169 ( 1.4) | 7 ( 1.2) | ||||
| Unemployed | 20631 (38.1) | 15059 ( 36.5) | 5388 (43.9) | 184 ( 32.9) | ||||
| Diagnósico de Trastorno por Consumo de Sustancias(d)/Diagnosed of Substance Use Disorder(d) (%) | Drug dependence | 39726 (73.4) | 29730 ( 72.0) | 9585 (78.0) | 411 ( 73.4) | <0.001 | 0.094 | |
| Hazardous consumption | 14433 (26.6) | 11584 ( 28.0) | 2700 (22.0) | 149 ( 26.6) | ||||
| Comorbidity ICD-10 (with amount of different diagnosis) (%) | Diagnosis unknown (under study) | 6131 (11.3) | 4065 ( 9.8) | 1984 (16.1) | 82 ( 14.6) | <0.001 | 0.169 | |
| One | 25583 (47.2) | 19262 ( 46.6) | 6062 (49.3) | 259 ( 46.2) | ||||
| Two or more | 1416 ( 2.6) | 1065 ( 2.6) | 339 ( 2.8) | 12 ( 2.1) | ||||
| Without psychiatric comorbidity | 21029 (38.8) | 16922 ( 41.0) | 3900 (31.7) | 207 ( 37.0) | ||||
| Rurality classification (%) | Urbana | 40666 (75.1) | 30833 ( 74.6) | 9430 (76.8) | 403 ( 72.0) | <0.001 | 0.104 | |
| Mixta | 6826 (12.6) | 5265 ( 12.7) | 1493 (12.2) | 68 ( 12.1) | ||||
| Rural | 6666 (12.3) | 5216 ( 12.6) | 1362 (11.1) | 88 ( 15.7) | ||||
| [Missing] | 1 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 1 ( 0.2) | ||||
| Types of ther. center (%) | Private Therapeutic Community | 18578 (34.3) | 13371 ( 32.4) | 5033 (41.0) | 174 ( 31.1) | <0.001 | 0.167 | |
| Public Hospital | 9078 (16.8) | 7013 ( 17.0) | 1962 (16.0) | 103 ( 18.4) | ||||
| Public Mental Health Primary Care | 11352 (21.0) | 8756 ( 21.2) | 2466 (20.1) | 130 ( 23.2) | ||||
| Public Primary Care | 8502 (15.7) | 6969 ( 16.9) | 1448 (11.8) | 85 ( 15.2) | ||||
| Public Therapeutic Community | 3651 ( 6.7) | 2809 ( 6.8) | 802 ( 6.5) | 40 ( 7.1) | ||||
| [Missing] | 2998 ( 5.5) | 2396 ( 5.8) | 574 ( 4.7) | 28 ( 5.0) | ||||
| Pre-treatment Criminality, Acquisitive (PO) (dichotomized) (%) | 0 | 53735 (99.2) | 41200 ( 99.7) | 12169 (99.1) | 366 ( 65.4) | <0.001 | 0.694 | |
| 1 | 424 ( 0.8) | 114 ( 0.3) | 116 ( 0.9) | 194 ( 34.6) | ||||
| Pre-treatment Criminality, Substance-related (PO) (dichotomized) (%) | 0 | 53977 (99.7) | 41271 ( 99.9) | 12264 (99.8) | 442 ( 78.9) | <0.001 | 0.488 | |
| 1 | 182 ( 0.3) | 43 ( 0.1) | 21 ( 0.2) | 118 ( 21.1) | ||||
| Pre-treatment Criminality, Violent (PO) (dichotomized) (%) | 0 | 53870 (99.5) | 41222 ( 99.8) | 12226 (99.5) | 422 ( 75.4) | <0.001 | 0.541 | |
| 1 | 289 ( 0.5) | 92 ( 0.2) | 59 ( 0.5) | 138 ( 24.6) | ||||
| Pre-treatment Criminality, Other (PO) (dichotomized) (%) | 0 | 53836 (99.4) | 41222 ( 99.8) | 12214 (99.4) | 400 ( 71.4) | <0.001 | 0.601 | |
| 1 | 323 ( 0.6) | 92 ( 0.2) | 71 ( 0.6) | 160 ( 28.6) | ||||
| Pre-treatment Criminality, Acquisitive (TOP) (dichotomized) (%) | 0 | 49742 (91.8) | 41225 ( 99.8) | 7966 (64.8) | 551 ( 98.4) | <0.001 | 0.713 | |
| 1 | 4417 ( 8.2) | 89 ( 0.2) | 4319 (35.2) | 9 ( 1.6) | ||||
| Pre-treatment Criminality, Substance-related (TOP) (dichotomized) (%) | 0 | 52585 (97.1) | 41305 (100.0) | 10720 (87.3) | 560 (100.0) | <0.001 | 0.367 | |
| 1 | 1574 ( 2.9) | 9 ( 0.0) | 1565 (12.7) | 0 ( 0.0) | ||||
| Pre-treatment Criminality, Violent (TOP) (dichotomized) (%) | 0 | 44606 (82.4) | 41110 ( 99.5) | 2967 (24.2) | 529 ( 94.5) | <0.001 | 1.601 | |
| 1 | 9553 (17.6) | 204 ( 0.5) | 9318 (75.8) | 31 ( 5.5) | ||||
| Pre-treatment Criminality, Other (TOP) (dichotomized) (%) | 0 | 52519 (97.0) | 41279 ( 99.9) | 10686 (87.0) | 554 ( 98.9) | <0.001 | 0.384 | |
| 1 | 1640 ( 3.0) | 35 ( 0.1) | 1599 (13.0) | 6 ( 1.1) | ||||
| Count of previous TOP offenses (median [IQR]) | 0.00 [0.00, 0.00] | 0.00 [0.00, 0.00] | 1.00 [1.00, 2.00] | 0.00 [0.00, 0.00] | <0.001 | nonnorm | 1.604 | |
| Count of previous TOP offenses (dichotomized) (median [IQR]) | 0.00 [0.00, 0.00] | 0.00 [0.00, 0.00] | 1.00 [1.00, 1.00] | 0.00 [0.00, 0.00] | <0.001 | nonnorm | 7.002 | |
| Count of previous offenses (median [IQR]) | 0.00 [0.00, 0.00] | 0.00 [0.00, 0.00] | 0.00 [0.00, 0.00] | 1.00 [1.00, 1.00] | <0.001 | nonnorm | 2.303 | |
| Count of previous PO offenses (dichotomized) (median [IQR]) | 0.00 [0.00, 0.00] | 0.00 [0.00, 0.00] | 0.00 [0.00, 0.00] | 1.00 [1.00, 1.00] | <0.001 | nonnorm | 8.496 |
We compared the records of TOP (Hurto,Robo, Venta.Drogas, Riña, Total.VIF, Otro, & tot_off_top) with the records of PO (vio, acq, sud, oth & tot). We grouped TOPs self-reports of offenses related to burglary or robbery into acquisitive (acq_top), the reports of drug-selling into substance related (sud_top) and reports of fights and domestic violence into violent (vio_top) and Other into other (oth_top).
cat("::: {.panel}", "\n", "[Total]{.panel-name}", "\n")
cat("**Total records of offenses vs. TOP reported offenses**")
paste0("**Records from PO**")
paste0("**Reports from TOP**")
pol_p<-
polychor(Base_fiscalia_v13c_dic_2022_3$tot_off_top, Base_fiscalia_v13c_dic_2022_3$tot, std.err=T, ML=T)
pol_p2<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$tot_off_top, Base_fiscalia_v13c_dic_2022_3$tot))
#rho : The (matrix) of tetrachoric/polychoric/biserial correlations
#tau : The normal equivalent of the cutpoints
#for ordinal variables, the same idea holds...but now we have more than one τ-cut to subdivide the normal distribution into regions associated with the different values of the ordinal measure..
# polychoric is appropriate when the manifest ordinal variables came from categorizing latent normal variables & not otherwise. (In practice, it's more like when you are willing to assume thi
#We start with a relative frequency pattern for our contingency table of ordinal data in the lower-left, and we find the optimal solution for correlation and tau-cuts to get us back to a bivariate normal distribution that would have comparable relative joint-frequencies.
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
message(
capture.output(print(pol_p))[c(2,3)]
)
cat("**Overdispersion**")
#The variance is not much greater than the mean, which suggests that we will have over-dispersion in the model.
# obtained with a poisson regression
cat("**Recode**")
#For more than two classes, these results are calculated comparing each factor level to the remaining levels (i.e. a "one versus all" approach).
# accuracy should be higher than No information rate (naive classifier) in order to be model significant.
#No information rate tests whether our classifier does better than random assignment
# A hypothesis test is also computed to evaluate whether the overall accuracy rate is greater than the rate of the largest class. P-Value [Acc > NIR]
#Null Error Rate: This is how often you would be wrong if you always predicted the majority class.
# but sometimes the best classifier for a particular application will sometimes have a higher error rate than the null error rate, as in the accuracy paradox
#if the incidence of category A is dominant, being found in 99% of cases, then predicting that every case is category A will have an accuracy of 99%
#Kappa statistics is a measure of how the classification results compare to values assigned by chance. P-value mcnemar, can produce NA values with sparse tables)
pander::pander(caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec), factor(Base_fiscalia_v13c_dic_2022_3$tot)))
positive:
table:
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| 0 | 40984 | 479 | 35 | 2 | 1 |
| 1 | 7624 | 309 | 32 | 5 | 2 |
| 2 | 2442 | 146 | 21 | 4 | 0 |
| 3 | 1140 | 68 | 7 | 0 | 0 |
| 4 | 819 | 34 | 5 | 0 | 0 |
overall:
| Accuracy | Kappa | AccuracyLower | AccuracyUpper | AccuracyNull |
|---|---|---|---|---|
| 0.7628 | 0.0401 | 0.7592 | 0.7664 | 0.9788 |
| AccuracyPValue | McnemarPValue |
|---|---|
| 1 | NA |
byClass:
| Sensitivity | Specificity | Pos Pred Value | Neg Pred Value | |
|---|---|---|---|---|
| Class: 0 | 0.7732 | 0.5504 | 0.9875 | 0.05001 |
| Class: 1 | 0.2983 | 0.8557 | 0.03876 | 0.9843 |
| Class: 2 | 0.21 | 0.9521 | 0.008037 | 0.9985 |
| Class: 3 | 0 | 0.9776 | 0 | 0.9998 |
| Class: 4 | 0 | 0.9842 | 0 | 0.9999 |
| Precision | Recall | F1 | Prevalence | Detection Rate | |
|---|---|---|---|---|---|
| Class: 0 | 0.9875 | 0.7732 | 0.8673 | 0.9788 | 0.7567 |
| Class: 1 | 0.03876 | 0.2983 | 0.06861 | 0.01913 | 0.005705 |
| Class: 2 | 0.008037 | 0.21 | 0.01548 | 0.001846 | 0.0003877 |
| Class: 3 | 0 | 0 | NA | 0.0002031 | 0 |
| Class: 4 | 0 | 0 | NA | 5.539e-05 | 0 |
| Detection Prevalence | Balanced Accuracy | |
|---|---|---|
| Class: 0 | 0.7663 | 0.6618 |
| Class: 1 | 0.1472 | 0.577 |
| Class: 2 | 0.04825 | 0.581 |
| Class: 3 | 0.02243 | 0.4888 |
| Class: 4 | 0.01584 | 0.4921 |
mode: sens_spec
dots:
cat("**Kappa with ordinal weights**")
pander::pander(irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$tot_off_top_rec), ordered(Base_fiscalia_v13c_dic_2022_3$tot)),weights = irrCAC::ordinal.weights(0:4)))
| coeff.name | coeff.val | coeff.se | coeff.ci | coeff.pval |
|---|---|---|---|---|
| Cohen’s Kappa | 0.0349 | 0.002059 | (0.031,0.039) | 0e+00 |
#really low
#To choose between linear and quadratic weights, ask yourself if the difference between being off by 1 vs. 2 categories is the same as the difference between being off by 2 vs. 3 categories. With linear weights, the penalty is always the same (e.g., 0.33 credit is subtracted for each additional category). However, this is not the case for quadratic weights, where penalties begin mild then grow harsher.
#CONS: Depends on the marginal distribution (prevalence) of the categories
cat("**Plot**")
ggstatsplot::ggscatterstats(tot_off_top, tot, data=Base_fiscalia_v13c_dic_2022_3,
type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
xlab="Number of TOP offenses", ylab="Number of PO offenses",
title="Relationship between Total counts of offenses from PO and TOP")
cat(":::", "\n")
cat("::: {.panel}", "\n", "[Acquisitive]{.panel-name}", "\n")
cat("**Total records of offenses vs. TOP reported offenses**")
paste0("**Records from PO**")
paste0("**Reports from TOP**")
pol_p_acq<-
polychor(Base_fiscalia_v13c_dic_2022_3$acq_top, Base_fiscalia_v13c_dic_2022_3$acq, std.err=T, ML=T)
pol_p2_acq<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$acq_top, Base_fiscalia_v13c_dic_2022_3$acq))
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
message(capture.output(print(pol_p_acq))[c(2,3)])
cat("**Recode**")
pander::pander(tidy(caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$acq_top), factor(Base_fiscalia_v13c_dic_2022_3$acq_rec))))
| term | class | estimate | conf.low | conf.high | p.value |
|---|---|---|---|---|---|
| accuracy | NA | 0.916 | 0.9137 | 0.9184 | 1 |
| kappa | NA | 0.04983 | NA | NA | NA |
| mcnemar | NA | NA | NA | NA | 0 |
| sensitivity | 0 | 0.9213 | NA | NA | NA |
| specificity | 0 | 0.441 | NA | NA | NA |
| pos_pred_value | 0 | 0.9952 | NA | NA | NA |
| neg_pred_value | 0 | 0.04234 | NA | NA | NA |
| precision | 0 | 0.9952 | NA | NA | NA |
| recall | 0 | 0.9213 | NA | NA | NA |
| f1 | 0 | 0.9568 | NA | NA | NA |
| prevalence | 0 | 0.9922 | NA | NA | NA |
| detection_rate | 0 | 0.9141 | NA | NA | NA |
| detection_prevalence | 0 | 0.9184 | NA | NA | NA |
| balanced_accuracy | 0 | 0.6812 | NA | NA | NA |
| sensitivity | 1 | 0.2519 | NA | NA | NA |
| specificity | 1 | 0.951 | NA | NA | NA |
| pos_pred_value | 1 | 0.03692 | NA | NA | NA |
| neg_pred_value | 1 | 0.9942 | NA | NA | NA |
| precision | 1 | 0.03692 | NA | NA | NA |
| recall | 1 | 0.2519 | NA | NA | NA |
| f1 | 1 | 0.06439 | NA | NA | NA |
| prevalence | 1 | 0.007404 | NA | NA | NA |
| detection_rate | 1 | 0.001865 | NA | NA | NA |
| detection_prevalence | 1 | 0.05052 | NA | NA | NA |
| balanced_accuracy | 1 | 0.6014 | NA | NA | NA |
| sensitivity | 2 | 0.2609 | NA | NA | NA |
| specificity | 2 | 0.9691 | NA | NA | NA |
| pos_pred_value | 2 | 0.003569 | NA | NA | NA |
| neg_pred_value | 2 | 0.9997 | NA | NA | NA |
| precision | 2 | 0.003569 | NA | NA | NA |
| recall | 2 | 0.2609 | NA | NA | NA |
| f1 | 2 | 0.007042 | NA | NA | NA |
| prevalence | 2 | 0.0004247 | NA | NA | NA |
| detection_rate | 2 | 0.0001108 | NA | NA | NA |
| detection_prevalence | 2 | 0.03104 | NA | NA | NA |
| balanced_accuracy | 2 | 0.615 | NA | NA | NA |
cat("**Kappa with ordinal weights**")
pander::pander(irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$acq_top), ordered(Base_fiscalia_v13c_dic_2022_3$acq_rec)),weights = irrCAC::ordinal.weights(0:2)))
| coeff.name | coeff.val | coeff.se | coeff.ci | coeff.pval |
|---|---|---|---|---|
| Cohen’s Kappa | 0.04807 | 0.004008 | (0.04,0.056) | 0e+00 |
cat("**Plot**")
ggstatsplot::ggscatterstats(acq_top, acq, data=Base_fiscalia_v13c_dic_2022_3,
type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
xlab="Number of TOP offenses", ylab="Number of PO offenses",
title="Relationship between counts of Acquisitive offenses from PO and TOP")
cat(":::", "\n")
cat("::: {.panel}", "\n", "[SUD]{.panel-name}", "\n")
cat("**Total records of offenses vs. TOP reported offenses**")
paste0("**Records from PO**")
paste0("**Reports from TOP**")
pol_p_sud<-
polychor(Base_fiscalia_v13c_dic_2022_3$sud_top, Base_fiscalia_v13c_dic_2022_3$sud, std.err=T, ML=T)
pol_p2_sud<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$sud_top, Base_fiscalia_v13c_dic_2022_3$sud))
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
message(capture.output(print(pol_p_sud))[c(2,3)])
cat("**Recode**")
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$sud_top), factor(Base_fiscalia_v13c_dic_2022_3$sud_rec))
))
| term | class | estimate | conf.low | conf.high | p.value |
|---|---|---|---|---|---|
| accuracy | NA | 0.9682 | 0.9667 | 0.9697 | 1 |
| kappa | NA | 0.01342 | NA | NA | NA |
| mcnemar | NA | NA | NA | NA | 2.423e-246 |
| sensitivity | 0 | 0.9712 | NA | NA | NA |
| specificity | 0 | 0.09341 | NA | NA | NA |
| pos_pred_value | 0 | 0.9969 | NA | NA | NA |
| neg_pred_value | 0 | 0.0108 | NA | NA | NA |
| precision | 0 | 0.9969 | NA | NA | NA |
| recall | 0 | 0.9712 | NA | NA | NA |
| f1 | 0 | 0.9838 | NA | NA | NA |
| prevalence | 0 | 0.9966 | NA | NA | NA |
| detection_rate | 0 | 0.9679 | NA | NA | NA |
| detection_prevalence | 0 | 0.9709 | NA | NA | NA |
| balanced_accuracy | 0 | 0.5323 | NA | NA | NA |
cat(":::", "\n")
cat("::: {.panel}", "\n", "[Violent]{.panel-name}", "\n")
cat("**Total records of offenses vs. TOP reported offenses**")
paste0("**Records from PO**")
paste0("**Reports from TOP**")
pol_p_vio<-
polychor(Base_fiscalia_v13c_dic_2022_3$vio_top, Base_fiscalia_v13c_dic_2022_3$vio, std.err=T, ML=T)
pol_p2_vio<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$vio_top, Base_fiscalia_v13c_dic_2022_3$vio))
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
message(capture.output(print(pol_p_vio))[c(2,3)])
cat("**Recode**")
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$vio_top), factor(Base_fiscalia_v13c_dic_2022_3$vio_rec))
))
| term | class | estimate | conf.low | conf.high | p.value |
|---|---|---|---|---|---|
| accuracy | NA | 0.8235 | 0.8203 | 0.8267 | 1 |
| kappa | NA | 0.01975 | NA | NA | NA |
| mcnemar | NA | NA | NA | NA | 0 |
| sensitivity | 0 | 0.8257 | NA | NA | NA |
| specificity | 0 | 0.564 | NA | NA | NA |
| pos_pred_value | 0 | 0.9972 | NA | NA | NA |
| neg_pred_value | 0 | 0.01706 | NA | NA | NA |
| precision | 0 | 0.9972 | NA | NA | NA |
| recall | 0 | 0.8257 | NA | NA | NA |
| f1 | 0 | 0.9034 | NA | NA | NA |
| prevalence | 0 | 0.9947 | NA | NA | NA |
| detection_rate | 0 | 0.8213 | NA | NA | NA |
| detection_prevalence | 0 | 0.8236 | NA | NA | NA |
| balanced_accuracy | 0 | 0.6949 | NA | NA | NA |
| sensitivity | 1 | 0.4326 | NA | NA | NA |
| specificity | 1 | 0.8553 | NA | NA | NA |
| pos_pred_value | 1 | 0.01541 | NA | NA | NA |
| neg_pred_value | 1 | 0.9965 | NA | NA | NA |
| precision | 1 | 0.01541 | NA | NA | NA |
| recall | 1 | 0.4326 | NA | NA | NA |
| f1 | 1 | 0.02976 | NA | NA | NA |
| prevalence | 1 | 0.005207 | NA | NA | NA |
| detection_rate | 1 | 0.002253 | NA | NA | NA |
| detection_prevalence | 1 | 0.1462 | NA | NA | NA |
| balanced_accuracy | 1 | 0.644 | NA | NA | NA |
| sensitivity | 2 | 0 | NA | NA | NA |
| specificity | 2 | 0.9698 | NA | NA | NA |
| pos_pred_value | 2 | 0 | NA | NA | NA |
| neg_pred_value | 2 | 0.9999 | NA | NA | NA |
| precision | 2 | 0 | NA | NA | NA |
| recall | 2 | 0 | NA | NA | NA |
| f1 | 2 | NA | NA | NA | NA |
| prevalence | 2 | 0.0001292 | NA | NA | NA |
| detection_rate | 2 | 0 | NA | NA | NA |
| detection_prevalence | 2 | 0.03023 | NA | NA | NA |
| balanced_accuracy | 2 | 0.4849 | NA | NA | NA |
cat("**Kappa with ordinal weights**")
pander::pander(
irrCAC::kappa2.table(table(ordered(Base_fiscalia_v13c_dic_2022_3$vio_top), ordered(Base_fiscalia_v13c_dic_2022_3$vio_rec)),weights = irrCAC::ordinal.weights(0:2))
)
| coeff.name | coeff.val | coeff.se | coeff.ci | coeff.pval |
|---|---|---|---|---|
| Cohen’s Kappa | 0.01956 | 0.00187 | (0.016,0.023) | 0e+00 |
cat("**Plot**")
ggstatsplot::ggscatterstats(vio_top, vio, data=Base_fiscalia_v13c_dic_2022_3,
type="nonparametric", point.width.jitter=.9, point.height.jitter=.9,
xlab="Number of TOP offenses", ylab="Number of PO offenses",
title="Relationship between counts of violent offenses from PO and TOP")
cat(":::", "\n")
cat("::: {.panel}", "\n", "[Other]{.panel-name}", "\n")
cat("**Total records of offenses vs. TOP reported offenses**")
paste0("**Records from PO**")
paste0("**Reports from TOP**")
pol_p_oth<-
polychor(Base_fiscalia_v13c_dic_2022_3$oth_top, Base_fiscalia_v13c_dic_2022_3$oth, std.err=T, ML=T)
pol_p2_oth<-
psych::polychoric(table(Base_fiscalia_v13c_dic_2022_3$oth_top, Base_fiscalia_v13c_dic_2022_3$oth))
cat("**Polychoric correlation: Total records of offenses vs. TOP reported offenses**")
message(capture.output(print(pol_p_oth))[c(2,3)])
cat("**Recode**")
pander::pander(tidy(
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$oth_top), factor(Base_fiscalia_v13c_dic_2022_3$oth_rec))
))
| term | class | estimate | conf.low | conf.high | p.value |
|---|---|---|---|---|---|
| accuracy | NA | 0.9648 | 0.9632 | 0.9663 | 1 |
| kappa | NA | 0.01875 | NA | NA | NA |
| mcnemar | NA | NA | NA | NA | 1.654e-199 |
| sensitivity | 0 | 0.9701 | NA | NA | NA |
| specificity | 0 | 0.08669 | NA | NA | NA |
| pos_pred_value | 0 | 0.9944 | NA | NA | NA |
| neg_pred_value | 0 | 0.01707 | NA | NA | NA |
| precision | 0 | 0.9944 | NA | NA | NA |
| recall | 0 | 0.9701 | NA | NA | NA |
| f1 | 0 | 0.9821 | NA | NA | NA |
| prevalence | 0 | 0.994 | NA | NA | NA |
| detection_rate | 0 | 0.9643 | NA | NA | NA |
| detection_prevalence | 0 | 0.9697 | NA | NA | NA |
| balanced_accuracy | 0 | 0.5284 | NA | NA | NA |
cat(":::", "\n")
#
cat("Manually construct measure tables")
#http://rstudio-pubs-static.s3.amazonaws.com/370944_96c386c03ac54ef3bec4535d49e92890.html
confusion_table = table(Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin, Base_fiscalia_v13c_dic_2022_3$tot_bin)
confusion_table[1,1] = 'TN'
confusion_table[1,2] = 'FN'
confusion_table[2,1] = 'FP'
confusion_table[2,2] = 'TP'
get_accuracy <- function(df, predicted, actual){
confusion_table = table(df[[predicted]],df[[actual]])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
accuracy = round((TP + TN) / sum(TP,FP,TN,FN), 2)
return(accuracy)
}
get_classification_error_rate <- function(df, predicted, actual){
confusion_table = table(df[[predicted]],df[[actual]])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
classification_error_rate = round((FP + FN) / sum(TP,FP,TN,FN),2)
return(classification_error_rate)
}
get_precision <- function(df, predicted, actual){
confusion_table = table(df[[predicted]],df[[actual]])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
precision = round(TP / (TP + FP), 2)
return(precision)
}
get_sensitivity <- function(df, predicted, actual){
confusion_table = table(df[[predicted]],df[[actual]])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
sensitivity = round(TP / (TP + FN), 2)
return(sensitivity)
}
get_specificity <- function(df, predicted, actual){
confusion_table = table(df[[predicted]],df[[actual]])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
specificity = round(TN / (TN + FP), 2)
return(specificity)
}
get_f1_score <- function(df, predicted, actual){
confusion_table = table(df[[predicted]],df[[actual]])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
precision = round(TP / (TP + FP), 2)
sensitivity = round(TP / (TP + FN), 2)
f1_score = round((2 * precision * sensitivity) / (precision + sensitivity), 2)
return(f1_score)
}
caret::confusionMatrix(factor(Base_fiscalia_v13c_dic_2022_3$tot_off_top_bin), factor(Base_fiscalia_v13c_dic_2022_3$tot_bin))
score = data.frame(accuracy=get_accuracy(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
classification_error_rate=get_classification_error_rate(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
precision=get_precision(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
sensitivity=get_sensitivity(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
specificity=get_specificity(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'),
f1_score=get_f1_score(Base_fiscalia_v13c_dic_2022_3, 'tot_off_top_bin', 'tot_bin'))
#We can also combine precision and recall into an F1 score. This is the harmonic mean of precision and recall.
knitr::kable(score, caption= "Table 1. Properties")# %>% kableExtra::kable_classic()
For ROC curves, we dichotomized into committing a crime or not against reporting or not (*_bin). Posteriorly, we calculated confidence intervals using bootstrap with 500 replications.
#Acquisitive SUD Violent Other
suppressMessages(suppressWarnings(library(ROCit)))
## Warning: package 'ROCit' was built under R version 3.5.2
message("Error in convertclass(class, reference = negref) : class must have exactly two unique values)")
cat("::::: {.panelset}", "\n")
cat("::: {.panel}", "\n", "[Total]{.panel-name}", "\n")
#FPR: 1-especificidad
#TPR: Sensibilidad
cat(":::", "\n")
cat("::: {.panel}", "\n", "[Acquisitive]{.panel-name}", "\n")
cat(":::", "\n")
cat("::: {.panel}", "\n", "[SUD]{.panel-name}", "\n")
cat(":::", "\n")
cat("::: {.panel}", "\n", "[Violent]{.panel-name}", "\n")
cat(":::", "\n")
cat("::: {.panel}", "\n", "[Other]{.panel-name}", "\n")
cat(":::", "\n")
cat(":::::", "\n")
mlogit.display2 <- function(multinom.model, decimal=2, alpha=.05) {
s <- summary(multinom.model)
z <- s$coefficients/s$standard.errors
pnorm.z <- pnorm(z)
pgroup <- cut(pnorm.z, c(0,0.0005,0.005,0.025,0.975, 0.995, 0.9995,1))
stars <-c("***","**","*","","*","**","***")
#x <-paste(round(s$coefficients,decimal),"/",round(s$standard.errors,decimal+1),stars[pgroup], sep="")
x <-paste(round(s$coefficients,decimal),"", "", sep="")
dim(x) <- dim(z)
colnames(x) <- colnames(s$coefficients)
rownames(x) <- rownames(s$coefficients)
x1 <- t( x)
x2 <- t(exp(s$coefficients))
x2 <- round(x2,decimal)
x2.1 <- t(exp(s$coefficients-qnorm(1-alpha/2)*s$standard.errors))
x2.1 <- round(x2.1,decimal)
x2.2 <- t(exp(s$coefficients+qnorm(1-alpha/2)*s$standard.errors))
x2.2 <- round(x2.2,decimal)
x2 <- paste(x2,"(", x2.1, ",", x2.2, ")", sep="")
dim(x2) <- dim(x1)
x2[1,] <- "-"
x3 <- cbind(x1[,1],x2[,1])
for(i in 2: (length(s$lab)-1)){x3 <- cbind(x3, cbind(x1[,i],x2[,i]))}
x4 <- rbind(c("Coeff./SE",paste("RRR(",100-100*alpha,"%CI) ",sep=""),"Coeff./SE",paste("RRR(",100-100*alpha,"%CI) ",sep="")),x3)
colnames.x4 <- c(s$lab[2],"")
for(i in 3:(length(s$lab))){colnames.x4 <- c(colnames.x4,c(s$lab[i],""))}
colnames(x4) <- colnames.x4
rownames(x4) <- c("",s$coefnames)
cat("\n")
cat(paste("Outcome =",as.character(s$term)[2],"; ","Referent group = ",s$lab[1],sep=""), "\n")
assign(paste0("output_", deparse(substitute(multinom.model))),data.table::data.table(x4, keep.rownames=T), envir = .GlobalEnv)
print.noquote(x4)
cat("\n")
cat("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ", "\n")
cat("\n")
cat(paste("Residual Deviance:", round(s$deviance,decimal), "\n"))
cat(paste("AIC =", round(s$AIC,digits=decimal), "\n"))
cat("\n")
}
library(rpart)
library(rpart.plot)
library(partykit)
#Random Forest
library(randomForest)
library(epiDisplay)
Error in library(epiDisplay): there is no package called ‘epiDisplay’
form<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod", "tot_off_top", "tot", "tot_off_top_bin", "tot_bin", "end_type", "fech_ap_top", "nombre_region")), collapse="+ "))) #paste0(c("acq","sud","vio","oth"),"_bin"), paste0(c("acq","sud","vio","oth"),"_top_bin"),
form_multilev<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "tot_off_top","tot_off_top_bin", "tot", "tot_bin", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod", "tot_off_top", "tot", "tot_off_top_bin", "tot_bin", "end_type", "fech_ap_top", "nombre_region")), collapse="+ "), "| HASH_KEY"))
set.seed(2125)
#Increase the MaxNWts parameter by passing it directly
#Error in nnet.default(X, Y, w, mask = mask, size = 0, skip = TRUE, softmax = TRUE, : too many (2502) weights
#In both GLMs and nnet::multinom it is the case that covariates should be scaled to make sure that the fit converges.
invisible("Multinomial logistic")
logit <- nnet::multinom(formula= form, data=Base_fiscalia_v13c_dic_2022_4 %>% dplyr::mutate_if(is.numeric, ~scale(.)),MaxNWts=84581, maxit= 1e3)
# weights: 165 (108 variable)
initial value 52446.652049
iter 10 value 27392.213657
iter 20 value 27336.082334
iter 30 value 27262.308639
iter 40 value 27174.675508
iter 50 value 27133.124026
iter 60 value 27104.160304
iter 70 value 27084.782458
iter 80 value 27074.848586
iter 90 value 27063.590666
iter 100 value 27061.309495
iter 110 value 27061.080883
iter 120 value 27060.910452
final value 27060.909134
converged
bs <- function(formula, data, indices) {
d = data[indices,] # allows boot to select sample
fit = nnet::multinom(formula, data=d, Hess = TRUE, trace=F, MaxNWts=84581, maxit= 1e3)
s = summary(fit)
return( cbind(s$coefficients, s$standard.errors) )
}
# 5 replications
results = list()
set.seed(2125)
results <- boot(
data=Base_fiscalia_v13c_dic_2022_4 %>% dplyr::mutate_if(is.numeric, ~scale(.)), statistic=bs, R=2e3, parallel='multicore', formula=form
)
#mm <- nnet::multinom(discrepancies ~ .+0, data=Base_fiscalia_v13c_dic_2022_4 %>% dplyr::mutate_if(is.numeric, ~scale(.)), trace=F)
#lmtest::lrtest(mm, "Speciesversicolor")
#Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
#car::Anova(mm)
#https://stackoverflow.com/questions/33170335/bootstrap-multinomial-regression-in-r
# label the estimates
subModelNames <- colnames(results$t0)
varNames <- rownames(results$t0)
estNames <- apply(expand.grid(varNames,subModelNames),1,function(x) paste(x,collapse="_"))
colnames(results$t) <- estNames
library(car)
multinom_cis_norm<-round(exp(confint(results, level=0.95, type="norm")),2)
multinom_cis_perc<-round(exp(confint(results, level=0.95, type="perc")),2)
#multinom_cis_bca<-confint(results, level=0.95, type="bca") #slow
#In confint.boot(results, level = 0.95, type = "bca") :
# BCa method fails for this problem. Using 'perc' instead
multinom_cis_logit <- round(exp(confint(logit)),2)
multinom_cis_logit12 <-confint(logit)
# plot the results
#hist(results, legend="separate")
mlogit.display2(logit)
Outcome =discrepancies; Referent group = coincidence
overreport
Coeff./SE
(Intercept) -1.04
edad_a_ap_top_num -0.31
fech_ap_top_num -0.19
sexWomen 0.08
duplicates_filtered 0.06
macrozonaNorth 0.07
macrozonaSouth -0.13
escolaridad_rec.L -0.18
escolaridad_rec.Q -0.05
estado_conyugal_2Separated/Divorced -0.15
estado_conyugal_2Single -0.03
estado_conyugal_2Widower -0.05
freq_cons_sus_prin2 to 3 days a week 0.15
freq_cons_sus_prin4 to 6 days a week 0.24
freq_cons_sus_prinDaily 0.29
freq_cons_sus_prinLess than 1 day a week -0.07
via_adm_sus_prin_actIntranasal (powder aspiration) -0.13
via_adm_sus_prin_actOther 0.79
via_adm_sus_prin_actSmoked or Pulmonary Aspiration -0.22
con_quien_vive_joelFamily of origin 0.03
con_quien_vive_joelOthers 0.05
con_quien_vive_joelWith couple/children 0.11
sus_principal_modCocaine hydrochloride 0.21
sus_principal_modMarijuana 0.09
sus_principal_modOther 0.06
sus_principal_modCocaine paste 0.38
origen_ingreso_modAssisted Referral 0.04
origen_ingreso_modOther 0.08
origen_ingreso_modJustice Sector -0.24
origen_ingreso_modHealth Sector -0.08
numero_de_hijos_mod 0.05
tenencia_de_la_vivienda_modOthers -0.2
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends -0.23
tenencia_de_la_vivienda_modRenting -0.25
tenencia_de_la_vivienda_modStays temporarily with a relative -0.27
sus_ini_mod_mvvCocaine hydrochloride 0.08
sus_ini_mod_mvvMarijuana 0.1
sus_ini_mod_mvvOther 0.17
sus_ini_mod_mvvCocaine paste -0.07
condicion_ocupacional_corrInactive 0.07
condicion_ocupacional_corrLooking for a job for the first time 0.36
condicion_ocupacional_corrNo activity -0.06
condicion_ocupacional_corrNot seeking for work -0.11
condicion_ocupacional_corrUnemployed 0.11
dg_trs_cons_sus_orHazardous consumption -0.1
comorbidity_icd_10One -0.25
comorbidity_icd_10Two or more -0.26
comorbidity_icd_10Without psychiatric comorbidity -0.49
clas_rMixta 0.09
clas_rRural 0.06
classificationPublic Hospital -0.08
classificationPublic Mental Health Primary Care -0.05
classificationPublic Primary Care -0.26
classificationPublic Therapeutic Community -0.06
RRR(95%CI)
(Intercept) -
edad_a_ap_top_num 0.74(0.71,0.76)
fech_ap_top_num 0.83(0.81,0.85)
sexWomen 1.09(1.03,1.15)
duplicates_filtered 1.07(1.04,1.09)
macrozonaNorth 1.07(1.01,1.14)
macrozonaSouth 0.88(0.81,0.95)
escolaridad_rec.L 0.84(0.8,0.88)
escolaridad_rec.Q 0.95(0.92,0.99)
estado_conyugal_2Separated/Divorced 0.86(0.79,0.95)
estado_conyugal_2Single 0.97(0.92,1.04)
estado_conyugal_2Widower 0.95(0.72,1.25)
freq_cons_sus_prin2 to 3 days a week 1.16(1.04,1.28)
freq_cons_sus_prin4 to 6 days a week 1.27(1.14,1.42)
freq_cons_sus_prinDaily 1.33(1.2,1.48)
freq_cons_sus_prinLess than 1 day a week 0.93(0.8,1.08)
via_adm_sus_prin_actIntranasal (powder aspiration) 0.88(0.59,1.33)
via_adm_sus_prin_actOther 2.21(1.27,3.84)
via_adm_sus_prin_actSmoked or Pulmonary Aspiration 0.8(0.6,1.08)
con_quien_vive_joelFamily of origin 1.03(0.94,1.13)
con_quien_vive_joelOthers 1.05(0.94,1.18)
con_quien_vive_joelWith couple/children 1.11(1.02,1.22)
sus_principal_modCocaine hydrochloride 1.23(0.81,1.86)
sus_principal_modMarijuana 1.09(0.8,1.5)
sus_principal_modOther 1.06(0.85,1.31)
sus_principal_modCocaine paste 1.46(1.08,1.97)
origen_ingreso_modAssisted Referral 1.04(0.96,1.12)
origen_ingreso_modOther 1.08(0.97,1.2)
origen_ingreso_modJustice Sector 0.79(0.73,0.85)
origen_ingreso_modHealth Sector 0.92(0.87,0.98)
numero_de_hijos_mod 1.05(1.02,1.08)
tenencia_de_la_vivienda_modOthers 0.82(0.66,1.01)
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends 0.8(0.67,0.94)
tenencia_de_la_vivienda_modRenting 0.78(0.66,0.93)
tenencia_de_la_vivienda_modStays temporarily with a relative 0.77(0.65,0.91)
sus_ini_mod_mvvCocaine hydrochloride 1.08(0.96,1.22)
sus_ini_mod_mvvMarijuana 1.11(1.05,1.17)
sus_ini_mod_mvvOther 1.18(1.03,1.36)
sus_ini_mod_mvvCocaine paste 0.93(0.85,1.03)
condicion_ocupacional_corrInactive 1.07(0.99,1.17)
condicion_ocupacional_corrLooking for a job for the first time 1.43(0.95,2.14)
condicion_ocupacional_corrNo activity 0.94(0.86,1.03)
condicion_ocupacional_corrNot seeking for work 0.89(0.72,1.1)
condicion_ocupacional_corrUnemployed 1.11(1.06,1.18)
dg_trs_cons_sus_orHazardous consumption 0.91(0.86,0.96)
comorbidity_icd_10One 0.78(0.73,0.83)
comorbidity_icd_10Two or more 0.77(0.67,0.89)
comorbidity_icd_10Without psychiatric comorbidity 0.61(0.57,0.65)
clas_rMixta 1.09(1.02,1.17)
clas_rRural 1.06(0.98,1.14)
classificationPublic Hospital 0.92(0.86,0.99)
classificationPublic Mental Health Primary Care 0.95(0.89,1.01)
classificationPublic Primary Care 0.77(0.71,0.84)
classificationPublic Therapeutic Community 0.94(0.86,1.03)
underreport
Coeff./SE
(Intercept) -3.82
edad_a_ap_top_num -0.2
fech_ap_top_num -0.21
sexWomen -0.04
duplicates_filtered -0.08
macrozonaNorth 0.09
macrozonaSouth 0.18
escolaridad_rec.L -0.09
escolaridad_rec.Q -0.07
estado_conyugal_2Separated/Divorced -0.02
estado_conyugal_2Single -0.09
estado_conyugal_2Widower -0.81
freq_cons_sus_prin2 to 3 days a week 0.23
freq_cons_sus_prin4 to 6 days a week 0.27
freq_cons_sus_prinDaily 0.19
freq_cons_sus_prinLess than 1 day a week 0.4
via_adm_sus_prin_actIntranasal (powder aspiration) -8.29
via_adm_sus_prin_actOther 1.93
via_adm_sus_prin_actSmoked or Pulmonary Aspiration 0.98
con_quien_vive_joelFamily of origin -0.18
con_quien_vive_joelOthers -0.15
con_quien_vive_joelWith couple/children -0.32
sus_principal_modCocaine hydrochloride 8.18
sus_principal_modMarijuana -0.69
sus_principal_modOther -0.16
sus_principal_modCocaine paste -1.01
origen_ingreso_modAssisted Referral -0.27
origen_ingreso_modOther -0.4
origen_ingreso_modJustice Sector 0.09
origen_ingreso_modHealth Sector -0.08
numero_de_hijos_mod 0.1
tenencia_de_la_vivienda_modOthers -0.08
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends 0.03
tenencia_de_la_vivienda_modRenting -0.03
tenencia_de_la_vivienda_modStays temporarily with a relative 0.11
sus_ini_mod_mvvCocaine hydrochloride -0.13
sus_ini_mod_mvvMarijuana -0.06
sus_ini_mod_mvvOther 0.22
sus_ini_mod_mvvCocaine paste 0.13
condicion_ocupacional_corrInactive -0.13
condicion_ocupacional_corrLooking for a job for the first time -0.28
condicion_ocupacional_corrNo activity -0.18
condicion_ocupacional_corrNot seeking for work -0.73
condicion_ocupacional_corrUnemployed -0.27
dg_trs_cons_sus_orHazardous consumption -0.17
comorbidity_icd_10One -0.44
comorbidity_icd_10Two or more -0.54
comorbidity_icd_10Without psychiatric comorbidity -0.56
clas_rMixta 0.01
clas_rRural 0.2
classificationPublic Hospital 0.14
classificationPublic Mental Health Primary Care 0.2
classificationPublic Primary Care -0.03
classificationPublic Therapeutic Community 0.11
RRR(95%CI)
(Intercept) -
edad_a_ap_top_num 0.82(0.73,0.92)
fech_ap_top_num 0.81(0.74,0.89)
sexWomen 0.96(0.75,1.24)
duplicates_filtered 0.92(0.83,1.02)
macrozonaNorth 1.09(0.84,1.42)
macrozonaSouth 1.2(0.92,1.57)
escolaridad_rec.L 0.92(0.74,1.13)
escolaridad_rec.Q 0.93(0.8,1.08)
estado_conyugal_2Separated/Divorced 0.98(0.69,1.37)
estado_conyugal_2Single 0.91(0.71,1.16)
estado_conyugal_2Widower 0.45(0.11,1.83)
freq_cons_sus_prin2 to 3 days a week 1.26(0.84,1.89)
freq_cons_sus_prin4 to 6 days a week 1.31(0.85,2.01)
freq_cons_sus_prinDaily 1.21(0.8,1.82)
freq_cons_sus_prinLess than 1 day a week 1.5(0.88,2.55)
via_adm_sus_prin_actIntranasal (powder aspiration) 0(0,0)
via_adm_sus_prin_actOther 6.87(1.53,30.82)
via_adm_sus_prin_actSmoked or Pulmonary Aspiration 2.66(0.52,13.69)
con_quien_vive_joelFamily of origin 0.84(0.6,1.17)
con_quien_vive_joelOthers 0.86(0.56,1.32)
con_quien_vive_joelWith couple/children 0.73(0.52,1.02)
sus_principal_modCocaine hydrochloride 3551.35(3049.91,4135.23)
sus_principal_modMarijuana 0.5(0.09,2.68)
sus_principal_modOther 0.85(0.33,2.2)
sus_principal_modCocaine paste 0.36(0.07,1.89)
origen_ingreso_modAssisted Referral 0.76(0.53,1.09)
origen_ingreso_modOther 0.67(0.39,1.13)
origen_ingreso_modJustice Sector 1.1(0.84,1.43)
origen_ingreso_modHealth Sector 0.92(0.73,1.15)
numero_de_hijos_mod 1.1(0.99,1.23)
tenencia_de_la_vivienda_modOthers 0.93(0.35,2.42)
tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends 1.03(0.48,2.23)
tenencia_de_la_vivienda_modRenting 0.97(0.44,2.12)
tenencia_de_la_vivienda_modStays temporarily with a relative 1.12(0.52,2.43)
sus_ini_mod_mvvCocaine hydrochloride 0.88(0.5,1.53)
sus_ini_mod_mvvMarijuana 0.94(0.75,1.18)
sus_ini_mod_mvvOther 1.25(0.73,2.12)
sus_ini_mod_mvvCocaine paste 1.13(0.75,1.71)
condicion_ocupacional_corrInactive 0.88(0.62,1.25)
condicion_ocupacional_corrLooking for a job for the first time 0.75(0.1,5.51)
condicion_ocupacional_corrNo activity 0.83(0.57,1.23)
condicion_ocupacional_corrNot seeking for work 0.48(0.15,1.52)
condicion_ocupacional_corrUnemployed 0.76(0.61,0.95)
dg_trs_cons_sus_orHazardous consumption 0.84(0.67,1.06)
comorbidity_icd_10One 0.64(0.49,0.84)
comorbidity_icd_10Two or more 0.58(0.31,1.08)
comorbidity_icd_10Without psychiatric comorbidity 0.57(0.43,0.76)
clas_rMixta 1.01(0.76,1.35)
clas_rRural 1.22(0.92,1.63)
classificationPublic Hospital 1.15(0.86,1.54)
classificationPublic Mental Health Primary Care 1.22(0.94,1.59)
classificationPublic Primary Care 0.97(0.71,1.33)
classificationPublic Therapeutic Community 1.12(0.77,1.63)
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual Deviance: 54121.82
AIC = 54337.82
tibble(output_logit) %>%
dplyr::slice(-1) %>%
tidyr::separate(V2, sep="\\(", into= c("RRR_ov","CI_ov")) %>%
tidyr::separate(CI_ov, sep="\\,", into= c("ci_lo_ov","ci_up_ov")) %>%
tidyr::separate(V4, sep="\\(", into= c("RRR_und","CI_und")) %>%
tidyr::separate(CI_und, sep="\\,", into= c("ci_lo_und","ci_up_und")) %>%
dplyr::mutate(ci_up_ov= gsub("\\)","",ci_up_ov)) %>%
dplyr::mutate(ci_up_und= gsub("\\)","",ci_up_und)) %>%
#output_logit %>%
knitr::kable("markdown", caption="Outputs from the multinomial model")
| rn | overreport | RRR_ov | ci_lo_ov | ci_up_ov | underreport | RRR_und | ci_lo_und | ci_up_und |
|---|---|---|---|---|---|---|---|---|
| (Intercept) | -1.04 | - | -3.82 | - | ||||
| edad_a_ap_top_num | -0.31 | 0.74 | 0.71 | 0.76 | -0.2 | 0.82 | 0.73 | 0.92 |
| fech_ap_top_num | -0.19 | 0.83 | 0.81 | 0.85 | -0.21 | 0.81 | 0.74 | 0.89 |
| sexWomen | 0.08 | 1.09 | 1.03 | 1.15 | -0.04 | 0.96 | 0.75 | 1.24 |
| duplicates_filtered | 0.06 | 1.07 | 1.04 | 1.09 | -0.08 | 0.92 | 0.83 | 1.02 |
| macrozonaNorth | 0.07 | 1.07 | 1.01 | 1.14 | 0.09 | 1.09 | 0.84 | 1.42 |
| macrozonaSouth | -0.13 | 0.88 | 0.81 | 0.95 | 0.18 | 1.2 | 0.92 | 1.57 |
| escolaridad_rec.L | -0.18 | 0.84 | 0.8 | 0.88 | -0.09 | 0.92 | 0.74 | 1.13 |
| escolaridad_rec.Q | -0.05 | 0.95 | 0.92 | 0.99 | -0.07 | 0.93 | 0.8 | 1.08 |
| estado_conyugal_2Separated/Divorced | -0.15 | 0.86 | 0.79 | 0.95 | -0.02 | 0.98 | 0.69 | 1.37 |
| estado_conyugal_2Single | -0.03 | 0.97 | 0.92 | 1.04 | -0.09 | 0.91 | 0.71 | 1.16 |
| estado_conyugal_2Widower | -0.05 | 0.95 | 0.72 | 1.25 | -0.81 | 0.45 | 0.11 | 1.83 |
| freq_cons_sus_prin2 to 3 days a week | 0.15 | 1.16 | 1.04 | 1.28 | 0.23 | 1.26 | 0.84 | 1.89 |
| freq_cons_sus_prin4 to 6 days a week | 0.24 | 1.27 | 1.14 | 1.42 | 0.27 | 1.31 | 0.85 | 2.01 |
| freq_cons_sus_prinDaily | 0.29 | 1.33 | 1.2 | 1.48 | 0.19 | 1.21 | 0.8 | 1.82 |
| freq_cons_sus_prinLess than 1 day a week | -0.07 | 0.93 | 0.8 | 1.08 | 0.4 | 1.5 | 0.88 | 2.55 |
| via_adm_sus_prin_actIntranasal (powder aspiration) | -0.13 | 0.88 | 0.59 | 1.33 | -8.29 | 0 | 0 | 0 |
| via_adm_sus_prin_actOther | 0.79 | 2.21 | 1.27 | 3.84 | 1.93 | 6.87 | 1.53 | 30.82 |
| via_adm_sus_prin_actSmoked or Pulmonary Aspiration | -0.22 | 0.8 | 0.6 | 1.08 | 0.98 | 2.66 | 0.52 | 13.69 |
| con_quien_vive_joelFamily of origin | 0.03 | 1.03 | 0.94 | 1.13 | -0.18 | 0.84 | 0.6 | 1.17 |
| con_quien_vive_joelOthers | 0.05 | 1.05 | 0.94 | 1.18 | -0.15 | 0.86 | 0.56 | 1.32 |
| con_quien_vive_joelWith couple/children | 0.11 | 1.11 | 1.02 | 1.22 | -0.32 | 0.73 | 0.52 | 1.02 |
| sus_principal_modCocaine hydrochloride | 0.21 | 1.23 | 0.81 | 1.86 | 8.18 | 3551.35 | 3049.91 | 4135.23 |
| sus_principal_modMarijuana | 0.09 | 1.09 | 0.8 | 1.5 | -0.69 | 0.5 | 0.09 | 2.68 |
| sus_principal_modOther | 0.06 | 1.06 | 0.85 | 1.31 | -0.16 | 0.85 | 0.33 | 2.2 |
| sus_principal_modCocaine paste | 0.38 | 1.46 | 1.08 | 1.97 | -1.01 | 0.36 | 0.07 | 1.89 |
| origen_ingreso_modAssisted Referral | 0.04 | 1.04 | 0.96 | 1.12 | -0.27 | 0.76 | 0.53 | 1.09 |
| origen_ingreso_modOther | 0.08 | 1.08 | 0.97 | 1.2 | -0.4 | 0.67 | 0.39 | 1.13 |
| origen_ingreso_modJustice Sector | -0.24 | 0.79 | 0.73 | 0.85 | 0.09 | 1.1 | 0.84 | 1.43 |
| origen_ingreso_modHealth Sector | -0.08 | 0.92 | 0.87 | 0.98 | -0.08 | 0.92 | 0.73 | 1.15 |
| numero_de_hijos_mod | 0.05 | 1.05 | 1.02 | 1.08 | 0.1 | 1.1 | 0.99 | 1.23 |
| tenencia_de_la_vivienda_modOthers | -0.2 | 0.82 | 0.66 | 1.01 | -0.08 | 0.93 | 0.35 | 2.42 |
| tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends | -0.23 | 0.8 | 0.67 | 0.94 | 0.03 | 1.03 | 0.48 | 2.23 |
| tenencia_de_la_vivienda_modRenting | -0.25 | 0.78 | 0.66 | 0.93 | -0.03 | 0.97 | 0.44 | 2.12 |
| tenencia_de_la_vivienda_modStays temporarily with a relative | -0.27 | 0.77 | 0.65 | 0.91 | 0.11 | 1.12 | 0.52 | 2.43 |
| sus_ini_mod_mvvCocaine hydrochloride | 0.08 | 1.08 | 0.96 | 1.22 | -0.13 | 0.88 | 0.5 | 1.53 |
| sus_ini_mod_mvvMarijuana | 0.1 | 1.11 | 1.05 | 1.17 | -0.06 | 0.94 | 0.75 | 1.18 |
| sus_ini_mod_mvvOther | 0.17 | 1.18 | 1.03 | 1.36 | 0.22 | 1.25 | 0.73 | 2.12 |
| sus_ini_mod_mvvCocaine paste | -0.07 | 0.93 | 0.85 | 1.03 | 0.13 | 1.13 | 0.75 | 1.71 |
| condicion_ocupacional_corrInactive | 0.07 | 1.07 | 0.99 | 1.17 | -0.13 | 0.88 | 0.62 | 1.25 |
| condicion_ocupacional_corrLooking for a job for the first time | 0.36 | 1.43 | 0.95 | 2.14 | -0.28 | 0.75 | 0.1 | 5.51 |
| condicion_ocupacional_corrNo activity | -0.06 | 0.94 | 0.86 | 1.03 | -0.18 | 0.83 | 0.57 | 1.23 |
| condicion_ocupacional_corrNot seeking for work | -0.11 | 0.89 | 0.72 | 1.1 | -0.73 | 0.48 | 0.15 | 1.52 |
| condicion_ocupacional_corrUnemployed | 0.11 | 1.11 | 1.06 | 1.18 | -0.27 | 0.76 | 0.61 | 0.95 |
| dg_trs_cons_sus_orHazardous consumption | -0.1 | 0.91 | 0.86 | 0.96 | -0.17 | 0.84 | 0.67 | 1.06 |
| comorbidity_icd_10One | -0.25 | 0.78 | 0.73 | 0.83 | -0.44 | 0.64 | 0.49 | 0.84 |
| comorbidity_icd_10Two or more | -0.26 | 0.77 | 0.67 | 0.89 | -0.54 | 0.58 | 0.31 | 1.08 |
| comorbidity_icd_10Without psychiatric comorbidity | -0.49 | 0.61 | 0.57 | 0.65 | -0.56 | 0.57 | 0.43 | 0.76 |
| clas_rMixta | 0.09 | 1.09 | 1.02 | 1.17 | 0.01 | 1.01 | 0.76 | 1.35 |
| clas_rRural | 0.06 | 1.06 | 0.98 | 1.14 | 0.2 | 1.22 | 0.92 | 1.63 |
| classificationPublic Hospital | -0.08 | 0.92 | 0.86 | 0.99 | 0.14 | 1.15 | 0.86 | 1.54 |
| classificationPublic Mental Health Primary Care | -0.05 | 0.95 | 0.89 | 1.01 | 0.2 | 1.22 | 0.94 | 1.59 |
| classificationPublic Primary Care | -0.26 | 0.77 | 0.71 | 0.84 | -0.03 | 0.97 | 0.71 | 1.33 |
| classificationPublic Therapeutic Community | -0.06 | 0.94 | 0.86 | 1.03 | 0.11 | 1.12 | 0.77 | 1.63 |
# knitr::kable("html", caption="Outputs from the multinomial model") %>%
# kableExtra::kable_classic()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Base_fiscalia_v13c_dic_2022_4$discrepancies <- factor(Base_fiscalia_v13c_dic_2022_4$discrepancies,
# levels = c("admin", "manage", "custodial"), ordered = FALSE)
# bw_vglm <- VGAM::vglm(formula= form, family = multinomial,
# data = bwmale2)
# coef(bw_vglm, matrix = TRUE)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#https://rubenfcasal.github.io/aprendizaje_estadistico/cart-con-el-paquete-rpart.html
set.seed(2125)
ind <- sample(x = 2, size = nrow(Base_fiscalia_v13c_dic_2022_4), replace = T, prob = c(0.7, 0.3))
train <- Base_fiscalia_v13c_dic_2022_4[ind == 1, ]
test <- Base_fiscalia_v13c_dic_2022_4[ind == 2, ]
mod1.2 <- rpart(form, data = train)
#mod1 #print(mod1)
#summary(mod1)
rpart.plot(mod1.2)
rpart.rules(mod1.2, style = "tall")
form2<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod")), collapse="+ ")))
form2_multilev<-
as.formula(paste("discrepancies~ ", paste(setdiff(c(vars_db, "numero_de_hijos_mod", "edad_a_ap_top_num", "fech_ap_top"), c("id_centro", "comuna_residencia_cod")), collapse="+ "), "| HASH_KEY"))
mod1.3 <- rpart(form2, data = train, cp=0)
head(mod1.3$cptable, 10)
xerror <- mod1.3$cptable[,"xerror"]
imin.xerror <- which.min(xerror)
# Valor óptimo
mod1.3$cptable[imin.xerror, ]
upper.xerror <- xerror[imin.xerror] + mod1.3$cptable[imin.xerror, "xstd"]
icp <- min(which(xerror <= upper.xerror))
cp <- mod1.3$cptable[icp, "CP"]
tree <- prune(mod1.3, cp = cp)
rpart.plot(tree, box.palette = "Blues")
invisible("can take more than 15 hours, because the model was taking municipallity and center ID!")
#tm_cl <- mlogit::mlogit(formula= form_multilev, data=Base_fiscalia_v13c_dic_2022_4)#, reflevel = "coincidence")
#pivot_longer(Base_fiscalia_v13c_dic_2022_4, id=c("HASH_KEY", "edad_a_ap_top_num"))
#modelsummary(list("Conditional logit"=tm_cl),
# fmt=3, estimate="{estimate}{stars}")
a_tab1_lab_aft_d<- paste0('Original C1 Dataset \n(p= ', formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0), ';\np= ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0),')')
a_tab2_lab_aft_d<- paste0('•Remove duplicated entries\\\\\\l•Overlapping treatments of patients\\\\\\l•Intermediate treatment events (continuous referrals) \\\\\\l')
a_tab3_lab_aft_d<- paste0(' C1 Dataset \n(p= ', formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0), ';\nn= ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0),')')
a_tab4_lab_aft_d<- paste0('Original Prosecutors Office\n(p= ',Base_fiscalia_v2%>% dplyr::distinct(rut_enc_saf)%>% nrow() %>% format(big.mark=','), ';\nCauses= ',Base_fiscalia_v2%>% dplyr::distinct(ruc)%>% nrow() %>% format(big.mark=','), ';\nRel.=',Base_fiscalia_v2%>%dplyr::distinct(idrelacion)%>%nrow()%>%format(big.mark=','), ';\nRUC_Vic_Imp=',Base_fiscalia_v2%>%dplyr::mutate(rel=paste0(ruc,"_",idsujeto_victima,"_",idsujeto_imputado,"_","iddelito"))%>%dplyr::distinct(rel)%>%nrow()%>%format(big.mark=','), ';\nn= ',format(nrow(Base_fiscalia_v2),big.mark=","),')')
#crimes committed after study follow-up
a_tab5_lab_aft_d1<-
paste0("(p= ",format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf"))),big.mark=","),"; RUCs= ", format(nrow(unique(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"ruc"))), big.mark=","),";n= ", format(nrow(subset(Base_fiscalia_v2,fec_comision_simple>as.Date("2019-11-13"),"rut_enc_saf")), big.mark=","),")")
#erase entries with missing values in fec_comision_simple y termino_relacion_simple
leftovers_Base_fiscalia_v3<-
Base_fiscalia_v3 %>%
dplyr::left_join(after_imp_Base_fiscalia_v3_db[,c("rut_enc_saf","imp_birth_date","flowch_age")], by="rut_enc_saf") %>%
dplyr::rename("obs"="flowch_age") %>%
dplyr::mutate(imp_birth_date=dplyr::case_when(!is.na(imp_birth_date)~imp_birth_date,T~fec_nacimiento_simple))%>% dplyr::mutate(edad_comision_imp=as.numeric(fec_comision_simple-imp_birth_date)/365.25) %>% dplyr::mutate(edad_ter_rel_imp=as.numeric(termino_relacion_simple-imp_birth_date)/365.25) %>%
#arrange the rut from the first date of comission of a crime, but we are not detecting if he/she is the victim or not
dplyr::arrange(rut_enc_saf, edad_comision_imp) %>% #566884
dplyr::filter(dplyr::case_when(!is.na(edad_comision_imp)~T,T~F)) %>% #566644
dplyr::filter(imp_birth_date=="1900-01-01"|is.na(imp_birth_date))
a_tab5_lab_aft_d12<-
paste0("(p= ",format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,rut_enc_saf)),big.mark=","),"; RUCs= ",
format(nrow(dplyr::distinct(leftovers_Base_fiscalia_v3,ruc)), big.mark=","),";n= ",
format(nrow(leftovers_Base_fiscalia_v3), big.mark=","),")")
#minor to 14 years old
a_tab5_lab_aft_d13<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),rut_enc_saf)),big.mark=","),"; RUCs= ", format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14),ruc)), big.mark=","),";n= ", format(nrow(dplyr::filter(Base_fiscalia_v4,edad_comision_imp<14)), big.mark=","),")")
#Remove duplicated entries
a_tab5_lab_aft_d2<-
paste0("(p= ",format(length(unique(eliminated_duplicates$rut_enc_saf)),big.mark=","),"; RUCs= ",
format(length(unique(eliminated_duplicates$ruc)), big.mark=","),";n= ",
format(nrow(eliminated_duplicates), big.mark=","),")")
#before 2010
a_tab5_lab_aft_d3<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),rut_enc_saf)),big.mark=","),"; RUCs= ",
format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01"),ruc)), big.mark=","),";n= ",
format(nrow(dplyr::filter(Base_fiscalia_v7,fec_comision_simple<"2010-01-01")), big.mark=","),")")
#remove administrative annulment
a_tab5_lab_aft_d4<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),rut_enc_saf)),big.mark=","),"; RUCs= ",
format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA"),ruc)), big.mark=","),";n= ",
format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="ANULACI¿N ADMINISTRATIVA")), big.mark=","),")")
#remove grouped to another case
a_tab5_lab_aft_d5<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),rut_enc_saf)),big.mark=","),"; RUCs= ",
format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO"),ruc)), big.mark=","),";n= ",
format(nrow(dplyr::filter(Base_fiscalia_v7,agrupa_terminos=="AGRUPACI¿N A OTRO CASO")), big.mark=","),")")
a_tab5_lab_aft_d<- paste0('•Filter crimes committed after study follow-up period',a_tab5_lab_aft_d1,'\\\\\\l•Remove duplicated entries',a_tab5_lab_aft_d2,'\\\\\\l•Correct dates (birth, comission of crime, end of judicial proceedings), missing nationality and sex\\\\\\l•Erase entries with missing values in comission of crime, end of judicial proceedings',a_tab5_lab_aft_d12,'\\\\\\l•Erase entries with values in comission of crime when minor to 14 years old after imputation',a_tab5_lab_aft_d13,'\\\\\\l•Filter crimes committed before study follow-up',a_tab5_lab_aft_d3,'\\\\\\l•Filter records with cause of end of the proceedings= administrative annulment',a_tab5_lab_aft_d4,'\\\\\\l•Filter records with cause of end of the proceedings= grouped to another case',a_tab5_lab_aft_d5,'\\\\\\l')
a_tab6_lab_aft_d<- paste0("O.P. Dataset \n(p= ", dplyr::distinct(Base_fiscalia_v8, rut_enc_saf)%>% nrow()%>% formatC(big.mark = ","), ";\nn= ",formatC(nrow(Base_fiscalia_v8),big.mark = ","),")")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#DISCARD not coded as an offender
a_tab7_lab_aft_d1<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),rut_enc_saf)),big.mark=","),"; RUCs= ",
format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado)),ruc)), big.mark=","),";n= ",
format(nrow(dplyr::filter(Base_fiscalia_v8,!grepl("SI",encontrado_como_imputado))), big.mark=","),")")
#FILTER IF THE PATIENT RECIEVES FOR THE RELATIONSHIP AMONG THOSE THAT WERE OFFENDERS
# end of proceeding === SIMILAR TO Base_fiscalia_v9
a_tab7_lab_aft_d2<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0),rut_enc_saf)),big.mark=","),"; RUCs= ",
format(nrow(dplyr::distinct(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0),ruc)), big.mark=","),";n= ",
format(nrow(dplyr::filter(Base_fiscalia_v8, grepl("SI",encontrado_como_imputado)) %>%
dplyr::mutate(filter=dplyr::case_when(grepl("REPARATORIO|CONDICIONAL",toupper(agrupa_terminos)) & is.na(gls_mottermino)~1, grepl("REPARATORIO|SENTENCIA DEFINITIVA CONDENATORIA|240|MONIT", toupper(agrupa_terminos), ignore.case=F)~2, T~0))%>% dplyr::filter(filter==0)), big.mark=","),")")
CONS_TOP_2022_anti<-
# 107307
CONS_TOP%>%
dplyr::left_join(subset(dplyr::mutate(dplyr::group_by(Base_fiscalia_v11b_dic_2022, hash_key), hash_rn=row_number())%>% ungroup(), hash_rn==1), by= c("HASH_KEY" = "hash_key"))%>%
dplyr::mutate(fech_ap_top_num= as.numeric(as.Date(str_sub(as.character(lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T)),1,10))))%>% #No parse failures
dplyr::select(HASH_KEY, fech_ap_top_num, Fecha.Aplicación.TOP, dateofbirth_imp, Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro) %>%
dplyr::filter(!is.na(HASH_KEY)) %>%
dplyr::mutate_at(vars("Hurto", "Robo", "Venta.Drogas", "Riña", "Otro"), ~ifelse(.=="S",1,0)) %>%
dplyr::mutate(Total.VIF= ifelse(Total.VIF>0,1,0))%>%
dplyr::mutate(tot_off_top = base::rowSums(dplyr::select(.,c(Hurto, Robo, Venta.Drogas, Riña, Total.VIF, Otro)), na.rm = T)) %>%
dplyr::mutate(dateofbirth_imp_num= as.numeric(dateofbirth_imp),
fech_ap_top= lubridate::parse_date_time(Fecha.Aplicación.TOP, c("%Y-%m-%d"),exact=T),
edad_a_ap_top_num= lubridate::time_length(lubridate::interval(dateofbirth_imp, fech_ap_top),unit="years"),
edad_b_ap_top_num= (fech_ap_top_num-dateofbirth_imp_num)/365.25,
edad_a_ap_top_num_lim= edad_a_ap_top_num-(1/12),
edad_b_ap_top_num_lim= edad_b_ap_top_num-(1/12)) %>%
dplyr::select(-dateofbirth_imp, -dateofbirth_imp_num) %>%
dplyr::filter(!is.na(edad_a_ap_top_num)) %>%
dplyr::group_by(HASH_KEY, edad_a_ap_top_num) %>%
dplyr::slice(-1) %>%
dplyr::ungroup()
a_tab7_lab_aft_d25<-
paste0("(p= ",format(length(unique(CONS_TOP_2022_anti$HASH_KEY)),big.mark=","),"; n= ",
format(length(CONS_TOP_2022_anti$HASH_KEY), big.mark=","),")")
a_tab7_lab_aft_d3<-
paste0("(p= ",format(nrow(dplyr::distinct(dplyr::anti_join(CONS_C1_df_dup_SEP_2020, CONS_C1_df_dup_SEP_2020_22_d, by=c("hash_key","dup")),hash_key)),big.mark=","),"; n= ",
format(nrow(dplyr::anti_join(CONS_C1_df_dup_SEP_2020, CONS_C1_df_dup_SEP_2020_22_d, by=c("hash_key","dup"))), big.mark=","),")")
a_tab8_lab_aft_d <- paste0('Offenses previous\nto the first admission\n',"(p= ",format(length(unique(Base_fiscalia_v10b_dic_2022[,"hash_key"])),big.mark=","),"; RUCs= ",
format(length(unique(Base_fiscalia_v10b_dic_2022[,"caseid"])), big.mark=","),";\nn= ",
format(length(Base_fiscalia_v10b_dic_2022[,"caseid"]), big.mark=","),")")
a_tab9_lab_aft<- paste0('•Discard observations coded as victims rather than ofenders ',tab7_lab_aft_d1,'\\\\\\l•Discard observations depending on the values of the end of the proceedings among offenders ',tab7_lab_aft_d2,'\\\\\\l•Discard TOP applications with duplicated dates and user ',a_tab7_lab_aft_d25,'\\\\\\l•Get the first treatment of each user ',a_tab7_lab_aft_d3,'\\\\\\l')
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#// ON x.hash_key == y.id AND x.edad_al_egres_imp > y.age_offending_imp AND x.dup = 1"
library(DiagrammeR)
plot_merge_flowchart_excercise<-
grViz("digraph flowchart {
fontname='Comic Sans MS'
# node definitions with substituted label text
node [shape = rectangle,fontsize = 9]
tab1 [label = '@@1']
blank [label = '', width = 0.0001, height = 0.0001]
tab2 [label = '@@2',fontsize = 7]
tab3 [label = '@@3']
tab4 [label = '@@4',fontsize = 8]
blank2 [label = '', width = 0.0001, height = 0.0001]
tab5 [label = '@@5',fontsize = 7]
tab6 [label= '@@6']
blank3 [label = '', width = 0.0001, height = 0.0001]
blank4 [label = '', width = 0.0001, height = 0.0001]
blank5 [label = '', width = 0.0001, height = 0.0001]
blank6 [label = '', width = 0.0001, height = 0.0001]
tab7 [label = '@@7',fontsize = 7]
tab8 [label= '@@8']
# edge definitions with the node IDs
rankdir='TB'; rank= same; tab1 -> blank [arrowhead = none,label=' Data wrangling and normalization process',fontsize = 8];
rankdir='TB'; rank= same; tab1; tab3;
blank -> tab2;
subgraph {
rank = same; tab2; blank;
}
rankdir='TB'; rank= same; blank -> tab3;
tab4 -> blank2 [arrowhead = none,label=' Data wrangling and normalization process',fontsize = 8];
blank2 -> tab5
blank2 -> tab6
blank4 -> blank6 [arrowhead= none, label=' ⟖']
blank6 -> tab7
subgraph {
rank = same; blank6; tab7;
}
# bring_db1_a
blank6 -> tab8 [label= ' Merge records where discharge age is greater than age of offenses', fontsize = 8]
subgraph {
rank = same; tab5; blank2;
}
subgraph {
rank= same; tab3 -> blank3 -> blank4 -> blank5 -> tab6 [arrowhead= none]
}
}
subgraph {
rank = same; tab3; tab6;
}
subgraph {
rank = same; tab1; tab4;
}
subgraph {
rank = same; tab2; tab5;
}
subgraph {
rank = same; tab1; tab3;
rankdir=TB
rank=same
}
subgraph {
rank = same; tab4; tab6;
rankdir=TB
rank=same
}
[1]: a_tab1_lab_aft_d
[2]: a_tab2_lab_aft_d
[3]: a_tab3_lab_aft_d
[4]: a_tab4_lab_aft_d
[5]: a_tab5_lab_aft_d
[6]: a_tab6_lab_aft_d
[7]: a_tab9_lab_aft
[8]: a_tab8_lab_aft_d
", width = 1200,
height = 900)
DPI = 1200
WidthCM = 11
HeightCM = 8
plot_merge_flowchart_excerciselibrary(DiagrammeRsvg)
plot_merge_flowchart_excercise %>%
export_svg %>% charToRaw %>% rsvg::rsvg_pdf("./_figs/_flowchart_comm_DAD.pdf")
plot_merge_flowchart_excercise %>% DiagrammeRsvg::export_svg()%>%charToRaw %>% rsvg::rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("./_figs/_flowchart_comm_DAD.png")
htmlwidgets::saveWidget(plot_merge_flowchart_excercise, "./_figs/_flowchart_comm_DAD.html")
webshot::webshot("./_figs/_flowchart_comm_DAD.html", "./_figs/_flowchart_comm_DAD.png",vwidth = 1200, vheight = 900, zoom = 3)
webshot::webshot("./_figs/_flowchart_comm_DAD.html", "./_figs/_flowchart_comm_DAD_alt.pdf")
a_tab10_lab_aft_d<- paste0("TOP Dataset \n(p= ", dplyr::distinct(CONS_TOP, HASH_KEY)%>% nrow()%>% formatC(big.mark = ","),";\nn= ",formatC(nrow(CONS_TOP),big.mark = ","),")")
#bring_db3
a_tab11_lab_aft_d1<-
paste0( '(p= ',format(nrow(dplyr::distinct(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup(),id)), big.mark=","), '; RUCs= ', format(nrow(dplyr::distinct(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup(),caseid)), big.mark=","), '; n= ', format(nrow(dplyr::group_by(Base_fiscalia_v9, id, caseid, end_type, fec_comision_simple, crime_code_c) %>% dplyr::slice(-1) %>% dplyr::ungroup()), big.mark=",") , ')'
)
message(
paste0("Patients in PO records with unique combination of ID, RUC, end type and date of comission of the offense and offense ", a_tab11_lab_aft_d1)
)
a_tab11_lab_aft_d<- paste0('•Count events such as shoplifting, theft, domestic violence, drug selling, fights, or related behavior in the last four weeks','\\\\\\l',
'•Get PO data with more than one record with the same case ID, sentence, date of offense and type of crime ', a_tab11_lab_aft_d1,'\\\\\\l')
a_tab12_lab_aft_d<-
paste0('Merged database\n(p= ',format(nrow(dplyr::distinct(Base_fiscalia_v13c_dic_2022_3,HASH_KEY)), big.mark=","), '; n= ', format(nrow(Base_fiscalia_v13c_dic_2022_3), big.mark=",") , ')'
)
plot_merge_flowchart_excercise2<-
grViz("digraph flowchart {
fontname='Comic Sans MS'
# edge definitions with the node IDs
# node definitions with substituted label text
node [shape = rectangle, fontsize = 9]
tab1 [label = '@@1', fontsize = 7]
tab2 [label = '@@2', fontsize = 7]
blank [label = '', width = 0.0001, height = 0.0001]
blanka [label = '', width = 0.0001, height = 0.0001]
blankb [label = '', width = 0.0001, height = 0.0001]
# bring_db2
subgraph {
rank = same; tab1 -> blanka -> blank -> blankb -> tab2 [arrowhead= none]
}
tab3 [label = '@@3',fontsize = 6]
blank2 [label = '', width = 0.0001, height = 0.0001]
blank -> blank2 [arrowhead= none, label=' ⟖']
subgraph {
rank = same; blank2 -> tab3
}
tab4 [label = '@@4',fontsize = 9]
blank2 -> tab4 [label=' Select records of offenses committed before the application of TOP but\n at least the last month previous to the application of TOP \\\\\\l',fontsize = 7];
}
[1]: a_tab8_lab_aft_d
[2]: a_tab10_lab_aft_d
[3]: a_tab11_lab_aft_d
[4]: a_tab12_lab_aft_d
[5]: ''
[6]: ''
[7]: ''
[8]: ''
", width = 1200,
height = 900)
plot_merge_flowchart_excercise2
plot_merge_flowchart_excercise2 %>%
export_svg %>% charToRaw %>% rsvg::rsvg_pdf("./_figs/_flowchart2_comm_DAD.pdf")
plot_merge_flowchart_excercise2 %>% DiagrammeRsvg::export_svg()%>%charToRaw %>% rsvg::rsvg(width = WidthCM *(DPI/2.54), height = HeightCM *(DPI/2.54)) %>% png::writePNG("./_figs/_flowchart2_comm_DAD.png")
htmlwidgets::saveWidget(plot_merge_flowchart_excercise2, "./_figs/_flowchart2_comm_DAD.html")
webshot::webshot("./_figs/_flowchart2_comm_DAD.html", "./_figs/_flowchart2_comm_DAD.png",vwidth = 1200, vheight = 900, zoom = 3)
webshot::webshot("./_figs/_flowchart2_comm_DAD.html", "./_figs/_flowchart2_comm_DAD_alt.pdf")
message(Sys.getenv("R_LIBS_USER"))
Sys.Date()
[1] "2023-02-20"
if (grepl("CISS Fondecyt",rstudioapi::getSourceEditorContext()$path)==T){
save.image("C:/Users/CISS Fondecyt/Mi unidad/Alvacast/SISTRAT 2022 (github)/14_alt.RData")
} else if (grepl("andre",rstudioapi::getSourceEditorContext()$path)==T){
save.image("C:/Users/andre/Desktop/SUD_CL/14_alt.RData")
} else if (grepl("E:",rstudioapi::getSourceEditorContext()$path)==T){
save.image("E:/Mi unidad/Alvacast/SISTRAT 2022 (github)/14_alt.RData")
} else {
save.image(paste0(sub("2019","2022",sub("SUD_CL","",path)),"14_alt.RData"))
}
Error in readRDS(responseFile): error al leer desde conexión
#load(paste0(dirname(rstudioapi::getSourceEditorContext()$path),"/14_alt.RData"))
sesion_info <- devtools::session_info()
dplyr::select(
tibble::as_tibble(sesion_info$packages),
c(package, loadedversion, source)
) %>%
DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
caption = htmltools::tags$caption(
style = 'caption-side: top; text-align: left;',
'', htmltools::em('Packages')),
options=list(
initComplete = htmlwidgets::JS(
"function(settings, json) {",
"$(this.api().tables().body()).css({
'font-family': 'Helvetica Neue',
'font-size': '50%',
'code-inline-font-size': '15%',
'white-space': 'nowrap',
'line-height': '0.75em',
'min-height': '0.5em'
});",#;
"}")))
#put name of the file
file<- "fiscalia_ags_jan_2023_match_SENDA.dta"
export_lab_stata_merge<-
tibble::rownames_to_column(data.frame(Hmisc::label(dplyr::select(Base_fiscalia_v13, all_of(c("hash_key", cont_vars_desc, cat_vars_desc, cat_vars_desc_off, vars_desc, "fech_ing_num_1", "fech_egres_imp", "cut_com_del", "cut_fec_nac","offender_d", "comuna_residencia_cod_rec", "porc_pobr", "Clasificación"))))))%>% data.frame() %>%
dplyr::rename("code" = !!names(.[1]), "label" = !!names(.[2]))%>% data.frame()%>%
dplyr::mutate(first= "cap noi label variable")%>%
dplyr::mutate(final= paste0(first, " ",code,' "',label,'"'))%>%
dplyr::select(-code,-label,-first)%>%
dplyr::rename("*clear all"="final") %>%
rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))%>%
rbind(paste0('cap noi save "', gsub('/', '\\', path, fixed=T),'\\',file,'", replace'))
rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge) %>% knitr::kable("markdown")
write.table(rbind(paste0('cap noi use "', gsub('/', '\\', path, fixed=T),'\\',file,'", clear'),export_lab_stata_merge), file = paste0(path,"/_label_var_to_stata_jan2023.do"), sep = "",row.names = FALSE, quote = FALSE, fileEncoding="UTF-8")
*should be in the same folder of the .Rmd to work
cap noi do _label_var_to_stata_jan2023Error in running command st